Skip to content

Commit

Permalink
VideoBackend/OGL: Prefer KHR_subgroup over NV_shader_thread.
Browse files Browse the repository at this point in the history
While the NV extension is totally fine, the KHR extension should be able to support more hardware.

For NVIDIA, the hardware either supports both or neither, it just needs a driver from the last two years.
For AMD, the drivers from late 2022-12 seems to bring support for the KHR extension.
For Intel, the KHR is also supported for some years.
  • Loading branch information
degasus committed Feb 1, 2023
1 parent 2a2ee5d commit de4987a
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 33 deletions.
1 change: 1 addition & 0 deletions Source/Core/Common/GL/GLExtensions/GLExtensions.h
Expand Up @@ -37,6 +37,7 @@
#include "Common/GL/GLExtensions/EXT_texture_filter_anisotropic.h"
#include "Common/GL/GLExtensions/HP_occlusion_test.h"
#include "Common/GL/GLExtensions/KHR_debug.h"
#include "Common/GL/GLExtensions/KHR_shader_subgroup.h"
#include "Common/GL/GLExtensions/NV_depth_buffer_float.h"
#include "Common/GL/GLExtensions/NV_occlusion_query_samples.h"
#include "Common/GL/GLExtensions/NV_primitive_restart.h"
Expand Down
19 changes: 19 additions & 0 deletions Source/Core/Common/GL/GLExtensions/KHR_shader_subgroup.h
@@ -0,0 +1,19 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
** SPDX-License-Identifier: MIT
*/

#include "Common/GL/GLExtensions/gl_common.h"

#define GL_SUBGROUP_SIZE_KHR 0x9532
#define GL_SUBGROUP_SUPPORTED_STAGES_KHR 0x9533
#define GL_SUBGROUP_SUPPORTED_FEATURES_KHR 0x9534
#define GL_SUBGROUP_QUAD_ALL_STAGES_KHR 0x9535
#define GL_SUBGROUP_FEATURE_BASIC_BIT_KHR 0x00000001
#define GL_SUBGROUP_FEATURE_VOTE_BIT_KHR 0x00000002
#define GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR 0x00000004
#define GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR 0x00000008
#define GL_SUBGROUP_FEATURE_SHUFFLE_BIT_KHR 0x00000010
#define GL_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT_KHR 0x00000020
#define GL_SUBGROUP_FEATURE_CLUSTERED_BIT_KHR 0x00000040
#define GL_SUBGROUP_FEATURE_QUAD_BIT_KHR 0x00000080
1 change: 0 additions & 1 deletion Source/Core/VideoBackends/Metal/MTLUtil.mm
Expand Up @@ -386,7 +386,6 @@ fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) {
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define CAN_USE_SUBGROUP_REDUCTION true
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
Expand Down
27 changes: 24 additions & 3 deletions Source/Core/VideoBackends/OGL/OGLRender.cpp
Expand Up @@ -627,7 +627,14 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
else if (GLExtensions::Version() >= 430)
{
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
g_ogl_config.eSupportedGLSLVersion = Glsl430;
if (GLExtensions::Version() >= 450)
{
g_ogl_config.eSupportedGLSLVersion = Glsl450;
}
else
{
g_ogl_config.eSupportedGLSLVersion = Glsl430;
}
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsSSAA = true;
Expand Down Expand Up @@ -669,8 +676,22 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
g_ogl_config.max_samples = 1;

g_ogl_config.bSupportsShaderThreadShuffleNV =
GLExtensions::Supports("GL_NV_shader_thread_shuffle");
g_ogl_config.bSupportsKHRShaderSubgroup = GLExtensions::Supports("GL_KHR_shader_subgroup") &&
((g_ogl_config.eSupportedGLSLVersion >= Glsl450 &&
g_ogl_config.eSupportedGLSLVersion < GlslEs300) ||
g_ogl_config.eSupportedGLSLVersion >= GlslEs320);
if (g_ogl_config.bSupportsKHRShaderSubgroup)
{
// Check for the features: basic + arithmetic + ballot
GLint supported_features = 0;
glGetIntegerv(GL_SUBGROUP_SUPPORTED_FEATURES_KHR, &supported_features);
if (~supported_features &
(GL_SUBGROUP_FEATURE_BASIC_BIT_KHR | GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR |
GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR))
{
g_ogl_config.bSupportsKHRShaderSubgroup = false;
}
}

// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
Expand Down
7 changes: 4 additions & 3 deletions Source/Core/VideoBackends/OGL/OGLRender.h
Expand Up @@ -25,8 +25,9 @@ enum GlslVersion
Glsl140,
Glsl150,
Glsl330,
Glsl400, // and above
Glsl430,
Glsl400, // 400 - 420
Glsl430, // 430 - 440
Glsl450, // 450 - xxx
GlslEs300, // GLES 3.0
GlslEs310, // GLES 3.1
GlslEs320, // GLES 3.2
Expand Down Expand Up @@ -71,7 +72,7 @@ struct VideoConfig
bool bSupportsBitfield;
bool bSupportsTextureSubImage;
EsFbFetchType SupportedFramebufferFetch;
bool bSupportsShaderThreadShuffleNV;
bool bSupportsKHRShaderSubgroup; // basic + arithmetic + ballot

const char* gl_vendor;
const char* gl_renderer;
Expand Down
27 changes: 11 additions & 16 deletions Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
Expand Up @@ -77,6 +77,8 @@ static std::string GetGLSLVersionString()
return "#version 400";
case Glsl430:
return "#version 430";
case Glsl450:
return "#version 450";
default:
// Shouldn't ever hit this
return "#version ERROR";
Expand Down Expand Up @@ -719,25 +721,18 @@ void ProgramShaderCache::CreateHeader()
}

std::string shader_shuffle_string;
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
if (g_ogl_config.bSupportsKHRShaderSubgroup)
{
shader_shuffle_string = R"(
#extension GL_NV_shader_thread_group : enable
#extension GL_NV_shader_thread_shuffle : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#extension GL_KHR_shader_subgroup_basic : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#extension GL_KHR_shader_subgroup_ballot : enable
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
#define IS_HELPER_INVOCATION gl_HelperThreadNV
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
value = func(value, shuffleXorNV(value, 8, 32)); \
value = func(value, shuffleXorNV(value, 4, 32)); \
value = func(value, shuffleXorNV(value, 2, 32)); \
value = func(value, shuffleXorNV(value, 1, 32));
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
#define SUBGROUP_MAX(value) value = subgroupMax(value)
)";
}

Expand Down
3 changes: 1 addition & 2 deletions Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp
Expand Up @@ -81,9 +81,8 @@ static const char SUBGROUP_HELPER_HEADER[] = R"(
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define CAN_USE_SUBGROUP_REDUCTION true
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (gl_SubgroupInvocationID == subgroupBallotFindLSB(subgroupBallot(!gl_HelperInvocation)))
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
#define SUBGROUP_MAX(value) value = subgroupMax(value)
)";
Expand Down
13 changes: 5 additions & 8 deletions Source/Core/VideoCommon/PixelShaderGen.cpp
Expand Up @@ -458,15 +458,12 @@ void UpdateBoundingBox(float2 rawpos) {{
int2 pos_br = pos | 1; // round up to odd
#ifdef SUPPORTS_SUBGROUP_REDUCTION
if (CAN_USE_SUBGROUP_REDUCTION) {{
int2 min_pos = IS_HELPER_INVOCATION ? int2(2147483647, 2147483647) : pos_tl;
int2 max_pos = IS_HELPER_INVOCATION ? int2(-2147483648, -2147483648) : pos_br;
SUBGROUP_MIN(min_pos);
SUBGROUP_MAX(max_pos);
if (!IS_HELPER_INVOCATION)
{{
SUBGROUP_MIN(pos_tl);
SUBGROUP_MAX(pos_br);
if (IS_FIRST_ACTIVE_INVOCATION)
UpdateBoundingBoxBuffer(min_pos, max_pos);
}} else {{
UpdateBoundingBoxBuffer(pos_tl, pos_br);
UpdateBoundingBoxBuffer(pos_tl, pos_br);
}}
#else
UpdateBoundingBoxBuffer(pos_tl, pos_br);
Expand Down

0 comments on commit de4987a

Please sign in to comment.