Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Perform PhysicalMemoryLimit check for workstation GC, refactor GetLar…
Browse files Browse the repository at this point in the history
…gestOnDieCacheSize into GetCacheSizePerLogicalCpu (#15975)

* refactor: combine GetLargestOnDieCacheSize and GetLogicalCpuCount in GetCacheSizePerLogicalCpu

* Perform PhysicalMemoryLimit check also for workstation GC
  • Loading branch information
tmds authored and Maoni0 committed Jan 29, 2018
1 parent 850a5be commit cb73944
Show file tree
Hide file tree
Showing 12 changed files with 50 additions and 259 deletions.
7 changes: 2 additions & 5 deletions src/gc/env/gcenv.os.h
Expand Up @@ -282,16 +282,13 @@ class GCToOSInterface
// Processor topology
//

// Get number of logical processors
static uint32_t GetLogicalCpuCount();

// Get size of the largest cache on the processor die
// Get size of the on die cache per logical processor
// Parameters:
// trueSize - true to return true cache size, false to return scaled up size based on
// the processor architecture
// Return:
// Size of the cache
static size_t GetLargestOnDieCacheSize(bool trueSize = true);
static size_t GetCacheSizePerLogicalCpu(bool trueSize = true);

// Get number of processors assigned to the current process
// Return:
Expand Down
24 changes: 14 additions & 10 deletions src/gc/gc.cpp
Expand Up @@ -15756,7 +15756,7 @@ void gc_heap::gc1()
size_t min_gc_size = dd_min_gc_size(dd);
// if min GC size larger than true on die cache, then don't bother
// limiting the desired size
if ((min_gc_size <= GCToOSInterface::GetLargestOnDieCacheSize(TRUE) / GCToOSInterface::GetLogicalCpuCount()) &&
if ((min_gc_size <= GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)) &&
desired_per_heap <= 2*min_gc_size)
{
desired_per_heap = min_gc_size;
Expand Down Expand Up @@ -35523,19 +35523,26 @@ size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
#ifdef SERVER_GC
// performance data seems to indicate halving the size results
// in optimal perf. Ask for adjusted gen0 size.
gen0size = max(GCToOSInterface::GetLargestOnDieCacheSize(FALSE)/GCToOSInterface::GetLogicalCpuCount(),(256*1024));
gen0size = max(GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE),(256*1024));

// if gen0 size is too large given the available memory, reduce it.
// Get true cache size, as we don't want to reduce below this.
size_t trueSize = max(GCToOSInterface::GetLargestOnDieCacheSize(TRUE)/GCToOSInterface::GetLogicalCpuCount(),(256*1024));
size_t trueSize = max(GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE),(256*1024));
dprintf (2, ("cache: %Id-%Id, cpu: %Id",
GCToOSInterface::GetLargestOnDieCacheSize(FALSE),
GCToOSInterface::GetLargestOnDieCacheSize(TRUE),
GCToOSInterface::GetLogicalCpuCount()));
GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE),
GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)));

int n_heaps = gc_heap::n_heaps;
#else //SERVER_GC
size_t trueSize = GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE);
gen0size = max((4*trueSize/5),(256*1024));
trueSize = max(trueSize, (256*1024));
int n_heaps = 1;
#endif //SERVER_GC

// if the total min GC across heaps will exceed 1/6th of available memory,
// then reduce the min GC size until it either fits or has been reduced to cache size.
while ((gen0size * gc_heap::n_heaps) > GCToOSInterface::GetPhysicalMemoryLimit() / 6)
while ((gen0size * n_heaps) > GCToOSInterface::GetPhysicalMemoryLimit() / 6)
{
gen0size = gen0size / 2;
if (gen0size <= trueSize)
Expand All @@ -35544,9 +35551,6 @@ size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
break;
}
}
#else //SERVER_GC
gen0size = max((4*GCToOSInterface::GetLargestOnDieCacheSize(TRUE)/5),(256*1024));
#endif //SERVER_GC
}

// Generation 0 must never be more than 1/2 the segment size.
Expand Down
8 changes: 1 addition & 7 deletions src/gc/unix/gcenv.unix.cpp
Expand Up @@ -221,12 +221,6 @@ void GCToOSInterface::DebugBreak()
#endif
}

// Get number of logical processors
uint32_t GCToOSInterface::GetLogicalCpuCount()
{
return g_logicalCpuCount;
}

// Causes the calling thread to sleep for the specified number of milliseconds
// Parameters:
// sleepMSec - time to sleep before switching to another thread
Expand Down Expand Up @@ -403,7 +397,7 @@ bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size,
// the processor architecture
// Return:
// Size of the cache
size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize)
size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
{
// TODO(segilles) processor detection
return 0;
Expand Down
9 changes: 1 addition & 8 deletions src/gc/windows/gcenv.windows.cpp
Expand Up @@ -228,13 +228,6 @@ void GCToOSInterface::DebugBreak()
::DebugBreak();
}

// Get number of logical processors
uint32_t GCToOSInterface::GetLogicalCpuCount()
{
// TODO(segilles) processor detection
return 1;
}

// Causes the calling thread to sleep for the specified number of milliseconds
// Parameters:
// sleepMSec - time to sleep before switching to another thread
Expand Down Expand Up @@ -381,7 +374,7 @@ bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size,
// the processor architecture
// Return:
// Size of the cache
size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize)
size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
{
// TODO(segilles) processor detection (see src/vm/util.cpp:1935)
return 0;
Expand Down
1 change: 0 additions & 1 deletion src/vm/CMakeLists.txt
Expand Up @@ -477,7 +477,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM)
)
elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
set(VM_SOURCES_DAC_AND_WKS_ARCH
${ARCH_SOURCES_DIR}/cgenarm64.cpp
${ARCH_SOURCES_DIR}/stubs.cpp
exceptionhandling.cpp
gcinfodecoder.cpp
Expand Down
83 changes: 0 additions & 83 deletions src/vm/amd64/cgenamd64.cpp
Expand Up @@ -458,89 +458,6 @@ BOOL GetAnyThunkTarget (CONTEXT *pctx, TADDR *pTarget, TADDR *pTargetMethodDesc)
// determine the number of logical cpus, or the machine is not populated uniformly with the same
// type of processors, this function returns 1.

extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]);

// fix this if/when AMD does multicore or SMT
DWORD GetLogicalCpuCount()
{
// No CONTRACT possible because GetLogicalCpuCount uses SEH

STATIC_CONTRACT_THROWS;
STATIC_CONTRACT_GC_NOTRIGGER;

static DWORD val = 0;

// cache value for later re-use
if (val)
{
return val;
}

struct Param : DefaultCatchFilterParam
{
DWORD retVal;
} param;
param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER;
param.retVal = 1;

PAL_TRY(Param *, pParam, &param)
{

unsigned char buffer[16];
DWORD maxCpuId = getcpuid(0, buffer);
DWORD* dwBuffer = (DWORD*)buffer;

if (maxCpuId < 1)
goto qExit;

if (dwBuffer[1] == 'uneG') {
if (dwBuffer[3] == 'Ieni') {
if (dwBuffer[2] == 'letn') { // get SMT/multicore enumeration for Intel EM64T


// TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on
// multi-core processor, but we never call into those two functions since we don't halve the
// gen0size when it's prescott and above processor. We keep the old version here for earlier
// generation system(Northwood based), perf data suggests on those systems, halve gen0 size
// still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood)
// based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS()
// and GetLogicalCpuCountFallback() works fine for those earlier generation systems.
// If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0
// size at all gives us overall better performance.
// This is going to be fixed with a new version in orcas time frame.

if( (maxCpuId > 3) && (maxCpuId < 0x80000000) )
goto qExit;

val = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API
if (val )
{
pParam->retVal = val; // OS API HT enumeration successful, we are Done
goto qExit;
}

val = GetLogicalCpuCountFallback(); // Fallback to HT enumeration using CPUID
if( val )
pParam->retVal = val;
}
}
}
qExit: ;
}

PAL_EXCEPT_FILTER(DefaultCatchFilter)
{
}
PAL_ENDTRY

if (val == 0)
{
val = param.retVal;
}

return param.retVal;
}

void EncodeLoadAndJumpThunk (LPBYTE pBuffer, LPVOID pv, LPVOID pTarget)
{
CONTRACTL
Expand Down
7 changes: 0 additions & 7 deletions src/vm/arm/stubs.cpp
Expand Up @@ -3369,13 +3369,6 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)

#ifndef CROSSGEN_COMPILE

DWORD GetLogicalCpuCount()
{
// Just use the OS to return this information (the APIs used exist on all versions of Windows which
// support ARM).
return GetLogicalCpuCountFromOS();
}

#ifdef FEATURE_READYTORUN

//
Expand Down
38 changes: 0 additions & 38 deletions src/vm/arm64/cgenarm64.cpp

This file was deleted.

6 changes: 1 addition & 5 deletions src/vm/cgensys.h
Expand Up @@ -34,10 +34,6 @@ int CallJitEHFilter (CrawlFrame* pCf, BYTE* startPC, EE_ILEXCEPTION_CLAUSE *EHC
void CallJitEHFinally(CrawlFrame* pCf, BYTE* startPC, EE_ILEXCEPTION_CLAUSE *EHClausePtr, DWORD nestingLevel);
#endif // _TARGET_X86_


// get number of logical to physical processors. Returns 1 on failure or non-intel x86 processors.
DWORD GetLogicalCpuCount();

//These are in util.cpp
extern size_t GetLogicalProcessorCacheSizeFromOS();
extern size_t GetIntelDeterministicCacheEnum();
Expand All @@ -47,7 +43,7 @@ extern DWORD GetLogicalCpuCountFallback();


// Try to determine the largest last-level cache size of the machine - return 0 if unknown or no L2/L3 cache
size_t GetLargestOnDieCacheSize(BOOL bTrueSize = TRUE);
size_t GetCacheSizePerLogicalCpu(BOOL bTrueSize = TRUE);


#ifdef FEATURE_COMINTEROP
Expand Down
11 changes: 2 additions & 9 deletions src/vm/gcenv.os.cpp
Expand Up @@ -145,13 +145,6 @@ void GCToOSInterface::DebugBreak()
::DebugBreak();
}

// Get number of logical processors
uint32_t GCToOSInterface::GetLogicalCpuCount()
{
LIMITED_METHOD_CONTRACT;
return ::GetLogicalCpuCount();
}

// Causes the calling thread to sleep for the specified number of milliseconds
// Parameters:
// sleepMSec - time to sleep before switching to another thread
Expand Down Expand Up @@ -322,11 +315,11 @@ bool GCToOSInterface::GetWriteWatch(bool resetState, void* address, size_t size,
// the processor architecture
// Return:
// Size of the cache
size_t GCToOSInterface::GetLargestOnDieCacheSize(bool trueSize)
size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
{
LIMITED_METHOD_CONTRACT;

return ::GetLargestOnDieCacheSize(trueSize);
return ::GetCacheSizePerLogicalCpu(trueSize);
}

// Sets the calling thread's affinity to only run on the processor specified
Expand Down

0 comments on commit cb73944

Please sign in to comment.