diff --git a/src/coreclr/gc/unix/gcenv.unix.cpp b/src/coreclr/gc/unix/gcenv.unix.cpp index ceadbf1af995b..4bda2dbb2020b 100644 --- a/src/coreclr/gc/unix/gcenv.unix.cpp +++ b/src/coreclr/gc/unix/gcenv.unix.cpp @@ -915,30 +915,33 @@ static size_t GetLogicalProcessorCacheSizeFromOS() #endif #if defined(HOST_ARM64) && !defined(TARGET_OSX) - if (cacheSize == 0) - { - // It is currently expected to be missing cache size info - // - // _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64 - // - // /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems. - // Arm64 patch is in Linux kernel tip. - // - // midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1", - // but without an exhaustive list of ARM64 processors any decode of midr_el1 - // Would likely be incomplete - - // Published information on ARM64 architectures is limited. - // If we use recent high core count chips as a guide for state of the art, we find - // total L3 cache to be 1-2MB/core. As always, there are exceptions. - - // Estimate cache size based on CPU count - // Assume lower core count are lighter weight parts which are likely to have smaller caches - // Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs - DWORD logicalCPUs = g_totalCpuCount; - - cacheSize = logicalCPUs * std::min(1536, std::max(256, (int)logicalCPUs * 128)) * 1024; - } + // It is currently expected to be missing cache size info + // + // _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64 + // + // /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems. + // Arm64 patch is in Linux kernel tip. + // + // midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1", + // but without an exhaustive list of ARM64 processors any decode of midr_el1 + // Would likely be incomplete + + // Published information on ARM64 architectures is limited. + // If we use recent high core count chips as a guide for state of the art, we find + // total L3 cache to be 1-2MB/core. As always, there are exceptions. + + // Estimate cache size based on CPU count + // Assume lower core count are lighter weight parts which are likely to have smaller caches + // Assume shared L3 grows linearly from 256Kb to 4Mb as logicalCPUs grows from 2 to 32 CPUs + + // As of 2022, in most cases /sys/devices/system/cpu/cpu*/cache/index*/ does present, but only + // reports L2 cache size and says nothing about L3 even if it exists. In this case we don't want + // to stuck with L2 (e.g. 256Kb on our test machine whether the real L3 is 32Mb) + // More details: https://github.com/dotnet/runtime/issues/60166 + DWORD logicalCPUs = GCToOSInterface::GetTotalProcessorCount(); + + size_t predictedSize = std::min(4096, std::max(256, (int)logicalCPUs*128))*1024; + cacheSize = std::max(predictedSize, cacheSize); #endif #if HAVE_SYSCTLBYNAME diff --git a/src/coreclr/gc/windows/gcenv.windows.cpp b/src/coreclr/gc/windows/gcenv.windows.cpp index 10d3128de2071..5c80881b47d65 100644 --- a/src/coreclr/gc/windows/gcenv.windows.cpp +++ b/src/coreclr/gc/windows/gcenv.windows.cpp @@ -458,6 +458,21 @@ size_t GetLogicalProcessorCacheSizeFromOS() if(pslpi) delete[] pslpi; // release the memory allocated for the SLPI array. + +#ifdef TARGET_ARM64 + // GetLogicalProcessorInformation doesn't report L3 cache size on our win-arm64 environment (current cache_size most + // likely represent L2 instead). We're going to use a processor-count based heuristic to predict its size and pick + // whatever is bigger. The same heuristic is used for Linux-arm64. + // More info: https://github.com/dotnet/runtime/issues/60166 + uint32_t logicalCPUs = GCToOSInterface::GetTotalProcessorCount(); + + // Estimate cache size based on CPU count + // Assume lower core count are lighter weight parts which are likely to have smaller caches + // Assume shared L3 grows linearly from 256Kb to 4Mb as logicalCPUs grows from 2 to 32 CPUs + size_t predictedSize = min(4096, max(256, logicalCPUs * 128)) * 1024; + cache_size = max(predictedSize, cache_size); +#endif + return cache_size; } diff --git a/src/coreclr/pal/src/misc/sysinfo.cpp b/src/coreclr/pal/src/misc/sysinfo.cpp index 19f9c86fd451c..4c131339e9fe1 100644 --- a/src/coreclr/pal/src/misc/sysinfo.cpp +++ b/src/coreclr/pal/src/misc/sysinfo.cpp @@ -582,30 +582,33 @@ PAL_GetLogicalProcessorCacheSizeFromOS() #endif #if defined(HOST_ARM64) && !defined(TARGET_OSX) - if (cacheSize == 0) - { - // It is currently expected to be missing cache size info - // - // _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64 - // - // /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems. - // Arm64 patch is in Linux kernel tip. - // - // midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1", - // but without an exhaustive list of ARM64 processors any decode of midr_el1 - // Would likely be incomplete - - // Published information on ARM64 architectures is limited. - // If we use recent high core count chips as a guide for state of the art, we find - // total L3 cache to be 1-2MB/core. As always, there are exceptions. - - // Estimate cache size based on CPU count - // Assume lower core count are lighter weight parts which are likely to have smaller caches - // Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs - DWORD logicalCPUs = PAL_GetLogicalCpuCountFromOS(); - - cacheSize = logicalCPUs*std::min(1536, std::max(256, (int)logicalCPUs*128))*1024; - } + // It is currently expected to be missing cache size info + // + // _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64 + // + // /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems. + // Arm64 patch is in Linux kernel tip. + // + // midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1", + // but without an exhaustive list of ARM64 processors any decode of midr_el1 + // Would likely be incomplete + + // Published information on ARM64 architectures is limited. + // If we use recent high core count chips as a guide for state of the art, we find + // total L3 cache to be 1-2MB/core. As always, there are exceptions. + + // Estimate cache size based on CPU count + // Assume lower core count are lighter weight parts which are likely to have smaller caches + // Assume shared L3 grows linearly from 256Kb to 4Mb as logicalCPUs grows from 2 to 32 CPUs + + // As of 2022, in most cases /sys/devices/system/cpu/cpu*/cache/index*/ does present, but only + // reports L2 cache size and says nothing about L3 even if it exists. In this case we don't want + // to stuck with L2 (e.g. 256Kb on our test machine whether the real L3 is 32Mb) + // More details: https://github.com/dotnet/runtime/issues/60166 + DWORD logicalCPUs = PAL_GetLogicalCpuCountFromOS(); + + size_t predictedSize = std::min(4096, std::max(256, (int)logicalCPUs*128))*1024; + cacheSize = std::max(predictedSize, cacheSize); #endif #if HAVE_SYSCTLBYNAME