Skip to content

Commit

Permalink
Merge pull request #16197 from hrydgard/more-uniform-optimization
Browse files Browse the repository at this point in the history
More uniform optimization, fixes
  • Loading branch information
hrydgard committed Oct 11, 2022
2 parents e179e46 + 089ac9a commit e0e29a1
Show file tree
Hide file tree
Showing 13 changed files with 35 additions and 26 deletions.
1 change: 1 addition & 0 deletions Common/GPU/Shader.cpp
Expand Up @@ -202,6 +202,7 @@ void init_resources(TBuiltInResource &Resources) {
Resources.maxCullDistances = 8;
Resources.maxCombinedClipAndCullDistances = 8;
Resources.maxSamples = 4;
Resources.maxDualSourceDrawBuffersEXT = 1;
Resources.limits.nonInductiveForLoops = 1;
Resources.limits.whileLoops = 1;
Resources.limits.doWhileLoops = 1;
Expand Down
10 changes: 7 additions & 3 deletions GPU/Common/FragmentShaderGenerator.cpp
Expand Up @@ -353,7 +353,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, "uniform sampler2D testtex;\n");
} else {
*uniformMask |= DIRTY_ALPHACOLORREF;
WRITE(p, "uniform vec4 u_alphacolorref;\n");
if (compat.bitwiseOps) {
WRITE(p, "uniform uint u_alphacolorref;\n");
} else {
WRITE(p, "uniform vec4 u_alphacolorref;\n");
}
if (compat.bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) {
*uniformMask |= DIRTY_ALPHACOLORMASK;
WRITE(p, "uniform uint u_alphacolormask;\n");
Expand Down Expand Up @@ -882,7 +886,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " };
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
if (compat.bitwiseOps) {
WRITE(p, " if ((roundAndScaleTo255i(v.a) & int(u_alphacolormask >> 24)) %s int(u_alphacolorref.a)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
WRITE(p, " if ((roundAndScaleTo255i(v.a) & int(u_alphacolormask >> 24)) %s int(u_alphacolorref >> 24)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
// Work around bad PVR driver problem where equality check + discard just doesn't work.
if (alphaTestFunc != GE_COMP_NOTEQUAL) {
Expand Down Expand Up @@ -946,7 +950,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
} else if (compat.bitwiseOps) {
WRITE(p, " uint v_uint = roundAndScaleTo8x4(v.rgb);\n");
WRITE(p, " uint v_masked = v_uint & u_alphacolormask;\n");
WRITE(p, " uint colorTestRef = packFloatsTo8x4(u_alphacolorref.rgb) & u_alphacolormask;\n");
WRITE(p, " uint colorTestRef = (u_alphacolorref & u_alphacolormask) & 0xFFFFFFu;\n");
WRITE(p, " if (v_masked %s colorTestRef) %s\n", test, discardStatement);
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) %s\n", test, discardStatement);
Expand Down
4 changes: 2 additions & 2 deletions GPU/Common/ShaderUniforms.cpp
Expand Up @@ -77,13 +77,13 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
Uint8x3ToFloat3(ub->texEnvColor, gstate.texenvcolor);
}
if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
ub->alphaColorRef = gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24);
}
if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
ub->colorTestMask = gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24);
}
if (dirtyUniforms & DIRTY_FOGCOLOR) {
Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
Uint8x3ToFloat3(ub->fogColor, gstate.fogcolor);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
Uint8x3ToFloat3(ub->blendFixA, gstate.getFixA());
Expand Down
13 changes: 6 additions & 7 deletions GPU/Common/ShaderUniforms.h
Expand Up @@ -17,7 +17,7 @@ enum : uint64_t {
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
};

// Currently 480 bytes. Probably can't get to 256 (nVidia's UBO alignment, also common in other vendors).
// Currently 448 bytes.
// Every line here is a 4-float.
struct alignas(16) UB_VS_FS_Base {
float proj[16];
Expand All @@ -34,13 +34,14 @@ struct alignas(16) UB_VS_FS_Base {
uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one.
uint32_t colorWriteMask; float mipBias;
// Fragment data
float fogColor[4]; // .w is unused
float fogColor[3]; uint32_t alphaColorRef;
float texEnvColor[3]; uint32_t colorTestMask;
int alphaColorRef[4];
float blendFixA[3]; float stencil;
float blendFixB[3]; float rotation;
float texClamp[4];
float texClampOffset[2]; float fogCoef[2];
// VR stuff is to go here, later. For normal drawing, we can then get away
// with just uploading the first 448 bytes of the struct (up to and including fogCoef).
};

static const char * const ub_baseStr =
Expand All @@ -58,10 +59,8 @@ R"( mat4 u_proj;
uint u_depal_mask_shift_off_fmt;
uint u_colorWriteMask;
float u_mipBias;
vec3 u_fogcolor;
vec3 u_texenv;
uint u_alphacolormask;
ivec4 u_alphacolorref;
vec3 u_fogcolor; uint u_alphacolorref;
vec3 u_texenv; uint u_alphacolormask;
vec3 u_blendFixA; float u_stencilReplaceValue;
vec3 u_blendFixB; float u_rotation;
vec4 u_texclamp;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/TextureCacheCommon.cpp
Expand Up @@ -2096,7 +2096,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer &&
!depth &&
!gstate_c.curTextureIs3D &&
draw_->GetDeviceCaps().fragmentShaderInt32Supported;
draw_->GetShaderLanguageDesc().bitwiseOps;

// TODO: Implement shader depal in the fragment shader generator for D3D11 at least.
switch (draw_->GetShaderLanguageDesc().shaderLanguage) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/D3D11/StateMappingD3D11.cpp
Expand Up @@ -147,7 +147,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
} else {
keys_.blend.value = 0;

pipelineState_.Convert(draw_->GetDeviceCaps().fragmentShaderInt32Supported);
pipelineState_.Convert(draw_->GetShaderLanguageDesc().bitwiseOps);
GenericMaskState &maskState = pipelineState_.maskState;
GenericBlendState &blendState = pipelineState_.blendState;
// We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only.
Expand Down
2 changes: 1 addition & 1 deletion GPU/Directx9/StateMappingDX9.cpp
Expand Up @@ -127,7 +127,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
}
dxstate.colorMask.set(mask);
} else {
pipelineState_.Convert(draw_->GetDeviceCaps().fragmentShaderInt32Supported);
pipelineState_.Convert(draw_->GetShaderLanguageDesc().bitwiseOps);
GenericMaskState &maskState = pipelineState_.maskState;
GenericBlendState &blendState = pipelineState_.blendState;
// We ignore the logicState on D3D since there's no support, the emulation of it is blend-and-shader only.
Expand Down
15 changes: 9 additions & 6 deletions GPU/GLES/ShaderManagerGLES.cpp
Expand Up @@ -359,7 +359,7 @@ void LinkedShader::use(const ShaderID &VSID) {
// Note that we no longer track attr masks here - we do it for the input layouts instead.
}

void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBufferedRendering) {
void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBufferedRendering, const ShaderLanguageDesc &shaderLanguage) {
u64 dirty = dirtyUniforms & availableUniforms;
dirtyUniforms = 0;

Expand Down Expand Up @@ -432,8 +432,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
render_->SetUniformM4x4(&u_proj, flippedMatrix.m);
render_->SetUniformF1(&u_rotation, useBufferedRendering ? 0 : (float)g_display_rotation);
}
if (dirty & DIRTY_PROJTHROUGHMATRIX)
{
if (dirty & DIRTY_PROJTHROUGHMATRIX) {
Matrix4x4 proj_through;
if (useBufferedRendering) {
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f);
Expand All @@ -446,7 +445,11 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
}
if (dirty & DIRTY_ALPHACOLORREF) {
SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
if (shaderLanguage.bitwiseOps) {
render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24));
} else {
SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
}
}
if (dirty & DIRTY_ALPHACOLORMASK) {
render_->SetUniformUI1(&u_alphacolormask, gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24));
Expand Down Expand Up @@ -813,7 +816,7 @@ LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs,
}

if (lastVShaderSame_ && FSID == lastFSID_) {
lastShader_->UpdateUniforms(vertType, VSID, useBufferedRendering);
lastShader_->UpdateUniforms(vertType, VSID, useBufferedRendering, draw_->GetShaderLanguageDesc());
return lastShader_;
}

Expand Down Expand Up @@ -856,7 +859,7 @@ LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs,
} else {
ls->use(VSID);
}
ls->UpdateUniforms(vertType, VSID, useBufferedRendering);
ls->UpdateUniforms(vertType, VSID, useBufferedRendering, draw_->GetShaderLanguageDesc());

lastShader_ = ls;
return ls;
Expand Down
3 changes: 2 additions & 1 deletion GPU/GLES/ShaderManagerGLES.h
Expand Up @@ -28,14 +28,15 @@
#include "GPU/Common/FragmentShaderGenerator.h"

class Shader;
struct ShaderLanguageDesc;

class LinkedShader {
public:
LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, FShaderID FSID, Shader *fs, bool useHWTransform, bool preloading = false);
~LinkedShader();

void use(const ShaderID &VSID);
void UpdateUniforms(u32 vertType, const ShaderID &VSID, bool useBufferedRendering);
void UpdateUniforms(u32 vertType, const ShaderID &VSID, bool useBufferedRendering, const ShaderLanguageDesc &shaderLanguage);

GLRenderManager *render_;
Shader *vs_;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GLES/StateMappingGLES.cpp
Expand Up @@ -143,7 +143,7 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
bool alphaMask = gstate.isClearModeAlphaMask();
renderManager->SetNoBlendAndMask((colorMask ? 7 : 0) | (alphaMask ? 8 : 0));
} else {
pipelineState_.Convert(draw_->GetDeviceCaps().fragmentShaderInt32Supported);
pipelineState_.Convert(draw_->GetShaderLanguageDesc().bitwiseOps);
GenericMaskState &maskState = pipelineState_.maskState;
GenericBlendState &blendState = pipelineState_.blendState;
GenericLogicState &logicState = pipelineState_.logicState;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUCommon.cpp
Expand Up @@ -3325,7 +3325,7 @@ u32 GPUCommon::CheckGPUFeatures() const {
features |= GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH;
}

if (draw_->GetDeviceCaps().fragmentShaderInt32Supported) {
if (draw_->GetShaderLanguageDesc().bitwiseOps) {
features |= GPU_USE_LIGHT_UBERSHADER;
}

Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/StateMappingVulkan.cpp
Expand Up @@ -147,7 +147,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
bool alphaMask = gstate.isClearModeAlphaMask();
key.colorWriteMask = (colorMask ? (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT) : 0) | (alphaMask ? VK_COLOR_COMPONENT_A_BIT : 0);
} else {
pipelineState_.Convert(draw_->GetDeviceCaps().fragmentShaderInt32Supported);
pipelineState_.Convert(draw_->GetShaderLanguageDesc().bitwiseOps);
GenericMaskState &maskState = pipelineState_.maskState;
GenericBlendState &blendState = pipelineState_.blendState;
GenericLogicState &logicState = pipelineState_.logicState;
Expand Down
3 changes: 2 additions & 1 deletion unittest/TestShaderGenerators.cpp
Expand Up @@ -560,6 +560,7 @@ bool TestGeometryShaders() {
std::string genErrorString[numLanguages];

for (int j = 0; j < numLanguages; j++) {
buffer[j][0] = 0;
generateSuccess[j] = GenerateGShader(id, buffer[j], languages[j], bugs, &genErrorString[j]);
if (!genErrorString[j].empty()) {
printf("%s\n", genErrorString[j].c_str());
Expand All @@ -569,7 +570,7 @@ bool TestGeometryShaders() {

for (int j = 0; j < numLanguages; j++) {
if (strlen(buffer[j]) >= CODE_BUFFER_SIZE) {
printf("Geoemtry shader exceeded buffer:\n\n%s\n", LineNumberString(buffer[j]).c_str());
printf("Geometry shader exceeded buffer:\n\n%s\n", LineNumberString(buffer[j]).c_str());
for (int i = 0; i < numLanguages; i++) {
delete[] buffer[i];
}
Expand Down

0 comments on commit e0e29a1

Please sign in to comment.