Skip to content

Commit

Permalink
Merge pull request #11523 from degasus/OGL_KHR_subgroup
Browse files Browse the repository at this point in the history
VideoBackend/OGL: Prefer KHR_shader_subgroup over NV_shader_thread.
  • Loading branch information
JMC47 committed Feb 10, 2023
2 parents dad7a32 + 4b2aa94 commit 258151f
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 33 deletions.
1 change: 1 addition & 0 deletions Source/Core/Common/GL/GLExtensions/GLExtensions.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "Common/GL/GLExtensions/EXT_texture_filter_anisotropic.h"
#include "Common/GL/GLExtensions/HP_occlusion_test.h"
#include "Common/GL/GLExtensions/KHR_debug.h"
#include "Common/GL/GLExtensions/KHR_shader_subgroup.h"
#include "Common/GL/GLExtensions/NV_depth_buffer_float.h"
#include "Common/GL/GLExtensions/NV_occlusion_query_samples.h"
#include "Common/GL/GLExtensions/NV_primitive_restart.h"
Expand Down
19 changes: 19 additions & 0 deletions Source/Core/Common/GL/GLExtensions/KHR_shader_subgroup.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
** SPDX-License-Identifier: MIT
*/

#include "Common/GL/GLExtensions/gl_common.h"

#define GL_SUBGROUP_SIZE_KHR 0x9532
#define GL_SUBGROUP_SUPPORTED_STAGES_KHR 0x9533
#define GL_SUBGROUP_SUPPORTED_FEATURES_KHR 0x9534
#define GL_SUBGROUP_QUAD_ALL_STAGES_KHR 0x9535
#define GL_SUBGROUP_FEATURE_BASIC_BIT_KHR 0x00000001
#define GL_SUBGROUP_FEATURE_VOTE_BIT_KHR 0x00000002
#define GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR 0x00000004
#define GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR 0x00000008
#define GL_SUBGROUP_FEATURE_SHUFFLE_BIT_KHR 0x00000010
#define GL_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT_KHR 0x00000020
#define GL_SUBGROUP_FEATURE_CLUSTERED_BIT_KHR 0x00000040
#define GL_SUBGROUP_FEATURE_QUAD_BIT_KHR 0x00000080
1 change: 0 additions & 1 deletion Source/Core/VideoBackends/Metal/MTLUtil.mm
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,6 @@ fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) {
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define CAN_USE_SUBGROUP_REDUCTION true
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
Expand Down
28 changes: 25 additions & 3 deletions Source/Core/VideoBackends/OGL/OGLConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,14 @@ bool PopulateConfig(GLContext* m_main_gl_context)
else if (GLExtensions::Version() >= 430)
{
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
g_ogl_config.eSupportedGLSLVersion = Glsl430;
if (GLExtensions::Version() >= 450)
{
g_ogl_config.eSupportedGLSLVersion = Glsl450;
}
else
{
g_ogl_config.eSupportedGLSLVersion = Glsl430;
}
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsSSAA = true;
Expand Down Expand Up @@ -531,8 +538,23 @@ bool PopulateConfig(GLContext* m_main_gl_context)
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
g_ogl_config.max_samples = 1;

g_ogl_config.bSupportsShaderThreadShuffleNV =
GLExtensions::Supports("GL_NV_shader_thread_shuffle");
const bool bSupportsIsHelperInvocation = g_ogl_config.bIsES ?
g_ogl_config.eSupportedGLSLVersion >= GlslEs320 :
g_ogl_config.eSupportedGLSLVersion >= Glsl450;
g_ogl_config.bSupportsKHRShaderSubgroup =
GLExtensions::Supports("GL_KHR_shader_subgroup") && bSupportsIsHelperInvocation;
if (g_ogl_config.bSupportsKHRShaderSubgroup)
{
// Check for the features: basic + arithmetic + ballot
GLint supported_features = 0;
glGetIntegerv(GL_SUBGROUP_SUPPORTED_FEATURES_KHR, &supported_features);
if (~supported_features &
(GL_SUBGROUP_FEATURE_BASIC_BIT_KHR | GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR |
GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR))
{
g_ogl_config.bSupportsKHRShaderSubgroup = false;
}
}

// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
Expand Down
7 changes: 4 additions & 3 deletions Source/Core/VideoBackends/OGL/OGLConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ enum GlslVersion
Glsl140,
Glsl150,
Glsl330,
Glsl400, // and above
Glsl430,
Glsl400, // and above
Glsl430, // 430 - 440
Glsl450, // 450 - xxx
GlslEs300, // GLES 3.0
GlslEs310, // GLES 3.1
GlslEs320, // GLES 3.2
Expand Down Expand Up @@ -61,7 +62,7 @@ struct VideoConfig
bool bSupportsBitfield;
bool bSupportsTextureSubImage;
EsFbFetchType SupportedFramebufferFetch;
bool bSupportsShaderThreadShuffleNV;
bool bSupportsKHRShaderSubgroup; // basic + arithmetic + ballot

const char* gl_vendor;
const char* gl_renderer;
Expand Down
27 changes: 11 additions & 16 deletions Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ static std::string GetGLSLVersionString()
return "#version 400";
case Glsl430:
return "#version 430";
case Glsl450:
return "#version 450";
default:
// Shouldn't ever hit this
return "#version ERROR";
Expand Down Expand Up @@ -720,25 +722,18 @@ void ProgramShaderCache::CreateHeader()
}

std::string shader_shuffle_string;
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
if (g_ogl_config.bSupportsKHRShaderSubgroup)
{
shader_shuffle_string = R"(
#extension GL_NV_shader_thread_group : enable
#extension GL_NV_shader_thread_shuffle : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#extension GL_KHR_shader_subgroup_basic : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#extension GL_KHR_shader_subgroup_ballot : enable
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
#define IS_HELPER_INVOCATION gl_HelperThreadNV
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
value = func(value, shuffleXorNV(value, 8, 32)); \
value = func(value, shuffleXorNV(value, 4, 32)); \
value = func(value, shuffleXorNV(value, 2, 32)); \
value = func(value, shuffleXorNV(value, 1, 32));
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
#define SUBGROUP_MAX(value) value = subgroupMax(value)
)";
}

Expand Down
3 changes: 1 addition & 2 deletions Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@ static const char SUBGROUP_HELPER_HEADER[] = R"(
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define CAN_USE_SUBGROUP_REDUCTION true
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (gl_SubgroupInvocationID == subgroupBallotFindLSB(subgroupBallot(!gl_HelperInvocation)))
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
#define SUBGROUP_MAX(value) value = subgroupMax(value)
)";
Expand Down
13 changes: 5 additions & 8 deletions Source/Core/VideoCommon/PixelShaderGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,15 +457,12 @@ void UpdateBoundingBox(float2 rawpos) {{
int2 pos_br = pos | 1; // round up to odd
#ifdef SUPPORTS_SUBGROUP_REDUCTION
if (CAN_USE_SUBGROUP_REDUCTION) {{
int2 min_pos = IS_HELPER_INVOCATION ? int2(2147483647, 2147483647) : pos_tl;
int2 max_pos = IS_HELPER_INVOCATION ? int2(-2147483648, -2147483648) : pos_br;
SUBGROUP_MIN(min_pos);
SUBGROUP_MAX(max_pos);
if (!IS_HELPER_INVOCATION)
{{
SUBGROUP_MIN(pos_tl);
SUBGROUP_MAX(pos_br);
if (IS_FIRST_ACTIVE_INVOCATION)
UpdateBoundingBoxBuffer(min_pos, max_pos);
}} else {{
UpdateBoundingBoxBuffer(pos_tl, pos_br);
UpdateBoundingBoxBuffer(pos_tl, pos_br);
}}
#else
UpdateBoundingBoxBuffer(pos_tl, pos_br);
Expand Down

0 comments on commit 258151f

Please sign in to comment.