Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Improve detection of CPU limits when running inside a Container
Browse files Browse the repository at this point in the history
This focuses on better supporting Docker CLI's parameter `--cpus`, which limits the amount of CPU time available to the container (ex: 1.8 means 180% CPU time, ie on 2 cores 90% for each core, on 4 cores 45% on each core, etc.)

All the runtime components depending on the number of processors available are:
 - ThreadPool
 - GC
 - `Environment.ProcessorCount` via `SystemNative::GetProcessorCount`
 - `SimpleRWLock::m_spinCount`
 - `BaseDomain::m_iNumberOfProcessors` (it's used to determine the GC heap to affinitize to)

All the above components take advantage of `--cpus` via `CGroup::GetCpuLimit` with #12797, allowing to optimize performance in a container/machine with limited resources. This makes sure the runtime components makes the best use of available resources.

In the case of `Environment.ProcessorCount`, the behavior is such that passing `--cpus=1.5` on a machine with 8 processors will return `1`  as shown in https://github.com/dotnet/coreclr/issues/22302#issuecomment-459092299. This behavior is not consistent with [Windows Job Objects](https://docs.microsoft.com/en-us/windows/desktop/api/winnt/ns-winnt-jobobject_cpu_rate_control_information) which still returns the number of processors for the container/machine even if it only gets parts of the total number of cycles.

This behavior is erroneous because the container still has access to the full range of processors on the machine, and only its _processor time_ is limited. For example, in the case of a 4 processors machine, with a value of `--cpus=1.8`, there can be 4 threads running in parallel even though each thread will only get `1.8 / 8 = .45` or 45% of all cycles of each processor.

The work consist in reverting the behavior of `SystemNative::GetProcessorCount` to pre #12797.
  • Loading branch information
luhenry committed Apr 2, 2019
1 parent e6c49f7 commit 63831df
Show file tree
Hide file tree
Showing 14 changed files with 35 additions and 23 deletions.
6 changes: 4 additions & 2 deletions src/System.Private.CoreLib/src/System/Environment.CoreCLR.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,12 @@ public static string[] GetCommandLineArgs()
get;
}

public static int ProcessorCount => GetProcessorCount();
public static int ProcessorCount => GetProcessorCount(withCpuLimit: false);

internal static int ProcessorQuota => GetProcessorCount(withCpuLimit: true);

[DllImport(JitHelpers.QCall, CharSet = CharSet.Unicode)]
private static extern int GetProcessorCount();
private static extern int GetProcessorCount(bool withCpuLimit);

// If you change this method's signature then you must change the code that calls it
// in excep.cpp and probably you will have to visit mscorlib.h to add the new signature
Expand Down
5 changes: 3 additions & 2 deletions src/classlibnative/bcltype/system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ FCIMPL1(ReflectMethodObject*, SystemNative::GetMethodFromStackTrace, ArrayBase*
}
FCIMPLEND

INT32 QCALLTYPE SystemNative::GetProcessorCount()
FCIMPL1(INT32, SystemNative::GetProcessorCount, CLR_BOOL withCpuLimit)
{
QCALL_CONTRACT;

Expand Down Expand Up @@ -346,14 +346,15 @@ INT32 QCALLTYPE SystemNative::GetProcessorCount()
#ifdef FEATURE_PAL
uint32_t cpuLimit;

if (PAL_GetCpuLimit(&cpuLimit) && cpuLimit < (uint32_t)processorCount)
if (withCpuLimit && PAL_GetCpuLimit(&cpuLimit) && cpuLimit < (uint32_t)processorCount)
processorCount = cpuLimit;
#endif

END_QCALL;

return processorCount;
}
FCIMPLEND

FCIMPL0(FC_BOOL_RET, SystemNative::HasShutdownStarted)
{
Expand Down
2 changes: 1 addition & 1 deletion src/classlibnative/bcltype/system.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class SystemNative
static FCDECL3(VOID, FailFastWithExceptionAndSource, StringObject* refMessageUNSAFE, ExceptionObject* refExceptionUNSAFE, StringObject* errorSourceUNSAFE);

// Returns the number of logical processors that can be used by managed code
static INT32 QCALLTYPE GetProcessorCount();
static FCDECL1(INT32, GetProcessorCount, CLR_BOOL withCpuLimit);

static FCDECL0(FC_BOOL_RET, HasShutdownStarted);
static FCDECL0(FC_BOOL_RET, IsServerGC);
Expand Down
2 changes: 1 addition & 1 deletion src/gc/env/gcenv.os.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ class GCToOSInterface
// Get number of processors assigned to the current process
// Return:
// The number of processors
static uint32_t GetCurrentProcessCpuCount();
static uint32_t GetCurrentProcessCpuCount(bool withCpuLimit = false);

// Sets the calling thread's affinity to only run on the processor specified
// in the GCThreadAffinity structure.
Expand Down
2 changes: 1 addition & 1 deletion src/gc/gc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34216,7 +34216,7 @@ HRESULT GCHeap::Initialize()
// GetCurrentProcessCpuCount only returns up to 64 procs.
uint32_t nhp_from_process = GCToOSInterface::CanEnableGCCPUGroups() ?
GCToOSInterface::GetTotalProcessorCount():
GCToOSInterface::GetCurrentProcessCpuCount();
GCToOSInterface::GetCurrentProcessCpuCount(true);

if (nhp_from_config)
{
Expand Down
7 changes: 4 additions & 3 deletions src/gc/unix/cgroup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class CGroup
{
long long quota;
long long period;
long long cpu_count;
double cpu_count;

quota = ReadCpuCGroupValue(CFS_QUOTA_FILENAME);
if (quota <= 0)
Expand All @@ -119,10 +119,11 @@ class CGroup
return true;
}

cpu_count = quota / period;
cpu_count = (double) quota / period;
if (cpu_count < UINT32_MAX)
{
*val = cpu_count;
// round up
*val = (uint32_t)(cpu_count + 0.999999999);
}
else
{
Expand Down
4 changes: 2 additions & 2 deletions src/gc/unix/gcenv.unix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processAffinityMa
// Get number of processors assigned to the current process
// Return:
// The number of processors
uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
uint32_t GCToOSInterface::GetCurrentProcessCpuCount(bool withCpuLimit)
{
uintptr_t pmask, smask;
uint32_t cpuLimit;
Expand All @@ -553,7 +553,7 @@ uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
if (count == 0 || count > 64)
count = 64;

if (GetCpuLimit(&cpuLimit) && cpuLimit < count)
if (withCpuLimit && GetCpuLimit(&cpuLimit) && cpuLimit < count)
count = cpuLimit;

return count;
Expand Down
2 changes: 1 addition & 1 deletion src/gc/windows/gcenv.windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uint
// Get number of processors assigned to the current process
// Return:
// The number of processors
uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
uint32_t GCToOSInterface::GetCurrentProcessCpuCount(bool withCpuLimit)
{
static int cCPUs = 0;

Expand Down
2 changes: 1 addition & 1 deletion src/inc/utilcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -1412,7 +1412,7 @@ class CPUGroupInfo
}
};

int GetCurrentProcessCpuCount();
int GetCurrentProcessCpuCount(bool withCpuLimit = false);
DWORD_PTR GetCurrentProcessCpuMask();

uint32_t GetOsPageSize();
Expand Down
9 changes: 5 additions & 4 deletions src/pal/src/misc/cgroup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class CGroup
{
long long quota;
long long period;
long long cpu_count;
double cpu_count;

quota = ReadCpuCGroupValue(CFS_QUOTA_FILENAME);
if (quota <= 0)
Expand All @@ -106,11 +106,12 @@ class CGroup
*val = 1;
return true;
}
cpu_count = quota / period;

cpu_count = (double) quota / period;
if (cpu_count < UINT_MAX)
{
*val = cpu_count;
// round up
*val = (UINT)(cpu_count + 0.999999999);
}
else
{
Expand Down
6 changes: 6 additions & 0 deletions src/pal/src/thread/process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2524,6 +2524,12 @@ PAL_GetCPUBusyTime(
{
return 0;
}

UINT cpuLimit;
if (PAL_GetCpuLimit(&cpuLimit) && cpuLimit < dwNumberOfProcessors)
{
dwNumberOfProcessors = cpuLimit;
}
}

if (getrusage(RUSAGE_SELF, &resUsage) == -1)
Expand Down
4 changes: 2 additions & 2 deletions src/utilcode/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,7 @@ DWORD LCM(DWORD u, DWORD v)
//******************************************************************************
// Returns the number of processors that a process has been configured to run on
//******************************************************************************
int GetCurrentProcessCpuCount()
int GetCurrentProcessCpuCount(bool withCpuLimit)
{
CONTRACTL
{
Expand Down Expand Up @@ -1222,7 +1222,7 @@ int GetCurrentProcessCpuCount()
#ifdef FEATURE_PAL
uint32_t cpuLimit;

if (PAL_GetCpuLimit(&cpuLimit) && cpuLimit < count)
if (withCpuLimit && PAL_GetCpuLimit(&cpuLimit) && cpuLimit < count)
count = cpuLimit;
#endif

Expand Down
4 changes: 2 additions & 2 deletions src/vm/gcenv.os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,11 +396,11 @@ bool GCToOSInterface::GetCurrentProcessAffinityMask(uintptr_t* processMask, uint
// Get number of processors assigned to the current process
// Return:
// The number of processors
uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
uint32_t GCToOSInterface::GetCurrentProcessCpuCount(bool withCpuLimit)
{
LIMITED_METHOD_CONTRACT;

return ::GetCurrentProcessCpuCount();
return ::GetCurrentProcessCpuCount(withCpuLimit);
}

// Return the size of the user-mode portion of the virtual address space of this process.
Expand Down
3 changes: 2 additions & 1 deletion src/vm/simplerwlock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ class SimpleRWLock
} CONTRACTL_END;

m_RWLock = 0;
m_spinCount = (GetCurrentProcessCpuCount() == 1) ? 0 : 4000;
// Passing false here reduces ASP.NET Core Plaintext benchmark results from 1.2M to 0.8M RPS.
m_spinCount = (GetCurrentProcessCpuCount(true) == 1) ? 0 : 4000;
m_WriterWaiting = FALSE;

#ifdef _DEBUG
Expand Down

0 comments on commit 63831df

Please sign in to comment.