Skip to content

Commit

Permalink
Fix CPU Core Count detection and Enable Parallel Shader Compilation
Browse files Browse the repository at this point in the history
This does this following things:

- Default to the runtime automatic number of threads for (pre-) compiling shaders
- Adds a distinct automatic thread count computation for pre-compilation  (which has less other things going on
and should scale better beyond 4 cores)
- Removes the unused logical_core_count field from the CPU detection
- Changes the semantics of num_cores from maximaum addressable number of cores to actually available CPU cores
(which is also how it was actually used)
- Updates the computation of the HTT flag now that AMD no longer lies about it for its Zen processors
  • Loading branch information
DevJPM committed Jan 2, 2021
1 parent f06e9c5 commit 4bff12f
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 35 deletions.
1 change: 0 additions & 1 deletion Source/Core/Common/CPUDetect.h
Expand Up @@ -27,7 +27,6 @@ struct CPUInfo

bool HTT = false;
int num_cores = 0;
int logical_cpu_count = 0;

bool bSSE = false;
bool bSSE2 = false;
Expand Down
41 changes: 10 additions & 31 deletions Source/Core/Common/x64CPUDetect.cpp
Expand Up @@ -4,6 +4,7 @@

#include <cstring>
#include <string>
#include <thread>

#include "Common/CPUDetect.h"
#include "Common/CommonTypes.h"
Expand Down Expand Up @@ -107,7 +108,6 @@ void CPUInfo::Detect()
// Detect family and other misc stuff.
bool ht = false;
HTT = ht;
logical_cpu_count = 1;
if (max_std_fn >= 1)
{
__cpuid(cpu_id, 0x00000001);
Expand All @@ -121,9 +121,13 @@ void CPUInfo::Detect()
// Detect AMD Zen1, Zen1+ and Zen2
if (family == 23)
bZen1p2 = true;
logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;
ht = (cpu_id[3] >> 28) & 1;

// AMD CPUs before Zen faked this flag and didn't actually
// implement simultaneous multithreading (SMT; Intel calls it HTT)
// but rather some weird middle-ground between 1-2 cores
HTT = ht && (vendor == CPUVendor::Intel || family >= 23);

if ((cpu_id[3] >> 25) & 1)
bSSE = true;
if ((cpu_id[3] >> 26) & 1)
Expand Down Expand Up @@ -201,35 +205,10 @@ void CPUInfo::Detect()
bLongMode = true;
}

num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count;

if (max_ex_fn >= 0x80000008)
{
// Get number of cores. This is a bit complicated. Following AMD manual here.
__cpuid(cpu_id, 0x80000008);
int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF;
if (apic_id_core_id_size == 0)
{
if (ht)
{
// New mechanism for modern Intel CPUs.
if (vendor == CPUVendor::Intel)
{
__cpuidex(cpu_id, 0x00000004, 0x00000000);
int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1;
HTT = (cores_x_package < logical_cpu_count);
cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1;
num_cores = (cores_x_package > 1) ? cores_x_package : num_cores;
logical_cpu_count /= cores_x_package;
}
}
}
else
{
// Use AMD's new method.
num_cores = (cpu_id[2] & 0xFF) + 1;
}
}
// this should be much more relieable and easier
// than trying to get the number of cores out of the CPUID data
// ourselves
num_cores = std::max(std::thread::hardware_concurrency(), 1u);
}

// Turn the CPU info into a string we can show
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/Config/GraphicsSettings.cpp
Expand Up @@ -84,9 +84,9 @@ const Info<bool> GFX_WAIT_FOR_SHADERS_BEFORE_STARTING{
{System::GFX, "Settings", "WaitForShadersBeforeStarting"}, false};
const Info<ShaderCompilationMode> GFX_SHADER_COMPILATION_MODE{
{System::GFX, "Settings", "ShaderCompilationMode"}, ShaderCompilationMode::Synchronous};
const Info<int> GFX_SHADER_COMPILER_THREADS{{System::GFX, "Settings", "ShaderCompilerThreads"}, 1};
const Info<int> GFX_SHADER_COMPILER_THREADS{{System::GFX, "Settings", "ShaderCompilerThreads"}, -1};
const Info<int> GFX_SHADER_PRECOMPILER_THREADS{
{System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1};
{System::GFX, "Settings", "ShaderPrecompilerThreads"}, -1};
const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};

Expand Down
10 changes: 9 additions & 1 deletion Source/Core/VideoCommon/VideoConfig.cpp
Expand Up @@ -198,6 +198,14 @@ static u32 GetNumAutoShaderCompilerThreads()
return static_cast<u32>(std::min(std::max(cpu_info.num_cores - 3, 1), 4));
}

static u32 GetNumAutoShaderPreCompilerThreads()
{
// Automatic number. We use clamp(cpus - 2, 1, infty) here.
// We chose this because we don't want to limit our speed-up
// and at the same time leave two logical cores for the dolphin UI and the rest of the OS.
return static_cast<u32>(std::max(cpu_info.num_cores - 2, 1));
}

u32 VideoConfig::GetShaderCompilerThreads() const
{
if (!backend_info.bSupportsBackgroundCompiling)
Expand All @@ -221,5 +229,5 @@ u32 VideoConfig::GetShaderPrecompilerThreads() const
if (iShaderPrecompilerThreads >= 0)
return static_cast<u32>(iShaderPrecompilerThreads);
else
return GetNumAutoShaderCompilerThreads();
return GetNumAutoShaderPreCompilerThreads();
}

0 comments on commit 4bff12f

Please sign in to comment.