Skip to content

Commit

Permalink
Merge pull request #16361 from unknownbrackets/depth-equal
Browse files Browse the repository at this point in the history
GPU: Avoid clears for non-simple depth values
  • Loading branch information
hrydgard authored Nov 9, 2022
2 parents e15efca + 745d9ad commit a853757
Show file tree
Hide file tree
Showing 14 changed files with 76 additions and 20 deletions.
17 changes: 17 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,28 @@ class DrawEngineCommon {
return 1;
}

inline void UpdateEverUsedEqualDepth(GEComparison comp) {
switch (comp) {
case GE_COMP_EQUAL:
case GE_COMP_NOTEQUAL:
case GE_COMP_LEQUAL:
case GE_COMP_GEQUAL:
everUsedEqualDepth_ = true;
break;

default:
break;
}
}

bool useHWTransform_ = false;
bool useHWTessellation_ = false;
// Used to prevent unnecessary flushing in softgpu.
bool flushOnParams_ = true;

// Set once a equal depth test is encountered.
bool everUsedEqualDepth_ = false;

// Vertex collector buffers
u8 *decoded = nullptr;
u16 *decIndex = nullptr;
Expand Down
9 changes: 9 additions & 0 deletions GPU/Common/GPUStateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,15 @@ bool IsColorTestTriviallyTrue() {
}
}

bool IsDepthTestEffectivelyDisabled() {
if (!gstate.isDepthTestEnabled())
return true;
// We can ignore stencil, because ALWAYS and disabled choose the same stencil path.
if (gstate.getDepthTestFunction() != GE_COMP_ALWAYS)
return false;
return !gstate.isDepthWriteEnabled();
}

const bool nonAlphaSrcFactors[16] = {
true, // GE_SRCBLEND_DSTCOLOR,
true, // GE_SRCBLEND_INVDSTCOLOR,
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/GPUStateUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ bool IsColorTestAgainstZero();
bool IsColorTestTriviallyTrue();
bool IsAlphaTestAgainstZero();
bool NeedsTestDiscard();
bool IsDepthTestEffectivelyDisabled();
bool IsStencilTestOutputDisabled();

StencilValueType ReplaceAlphaWithStencilType();
Expand Down
4 changes: 4 additions & 0 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,10 @@ void DrawEngineD3D11::DoFlush() {
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
// Non-zero depth clears are unusual, but some drivers don't match drawn depth values to cleared values.
// Games sometimes expect exact matches (see #12626, for example) for equal comparisons.
if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f)
result.action = SW_DRAW_PRIMITIVES;

ApplyDrawStateLate(result.setStencil, result.stencilValue);

Expand Down
3 changes: 1 addition & 2 deletions GPU/D3D11/GPU_D3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@ u32 GPU_D3D11::CheckGPUFeatures() const {
if (!g_Config.bHighQualityDepth && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
} else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
// Use fragment rounding on desktop and GLES3, most accurate.
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
} else if (PSP_CoreParameter().compat.flags().VertexDepthRounding) {
features |= GPU_ROUND_DEPTH_TO_16BIT;
}
Expand Down
3 changes: 2 additions & 1 deletion GPU/D3D11/StateMappingD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,11 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
} else {
keys_.depthStencil.value = 0;
// Depth Test
if (gstate.isDepthTestEnabled()) {
if (!IsDepthTestEffectivelyDisabled()) {
keys_.depthStencil.depthTestEnable = true;
keys_.depthStencil.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
keys_.depthStencil.depthWriteEnable = gstate.isDepthWriteEnabled();
UpdateEverUsedEqualDepth(gstate.getDepthTestFunction());
} else {
keys_.depthStencil.depthTestEnable = false;
keys_.depthStencil.depthWriteEnable = false;
Expand Down
4 changes: 4 additions & 0 deletions GPU/Directx9/DrawEngineDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,10 @@ void DrawEngineDX9::DoFlush() {
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
// Non-zero depth clears are unusual, but some drivers don't match drawn depth values to cleared values.
// Games sometimes expect exact matches (see #12626, for example) for equal comparisons.
if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f)
result.action = SW_DRAW_PRIMITIVES;

ApplyDrawStateLate();

Expand Down
3 changes: 2 additions & 1 deletion GPU/Directx9/StateMappingDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,11 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
}
} else {
// Depth Test
if (gstate.isDepthTestEnabled()) {
if (!IsDepthTestEffectivelyDisabled()) {
dxstate.depthTest.enable();
dxstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]);
dxstate.depthWrite.set(gstate.isDepthWriteEnabled());
UpdateEverUsedEqualDepth(gstate.getDepthTestFunction());
} else {
dxstate.depthTest.disable();
}
Expand Down
4 changes: 4 additions & 0 deletions GPU/GLES/DrawEngineGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,10 @@ void DrawEngineGLES::DoFlush() {
swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, maxIndex, &result);
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
// Non-zero depth clears are unusual, but some drivers don't match drawn depth values to cleared values.
// Games sometimes expect exact matches (see #12626, for example) for equal comparisons.
if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f)
result.action = SW_DRAW_PRIMITIVES;

ApplyDrawStateLate(result.setStencil, result.stencilValue);

Expand Down
11 changes: 6 additions & 5 deletions GPU/GLES/GPU_GLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,17 +181,18 @@ u32 GPU_GLES::CheckGPUFeatures() const {

// If we already have a 16-bit depth buffer, we don't need to round.
bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
if (prefer24) {
bool prefer16 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D16;
if (!prefer16) {
if (!g_Config.bHighQualityDepth && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
} else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
if (!gl_extensions.IsGLES || gl_extensions.GLES3) {
// Use fragment rounding on desktop and GLES3, most accurate.
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
} else if (prefer24 && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
if (prefer24 && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
// Here we can simulate a 16 bit depth buffer by scaling.
// Note that the depth buffer is fixed point, not floating, so dividing by 256 is pretty good.
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
} else if (!gl_extensions.IsGLES || gl_extensions.GLES3) {
// Use fragment rounding on desktop and GLES3, most accurate.
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
} else {
// At least do vertex rounding if nothing else.
features |= GPU_ROUND_DEPTH_TO_16BIT;
Expand Down
5 changes: 4 additions & 1 deletion GPU/GLES/StateMappingGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,10 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
renderManager->SetDepth(true, gstate.isClearModeDepthMask() ? true : false, GL_ALWAYS);
} else {
// Depth Test
renderManager->SetDepth(gstate.isDepthTestEnabled(), gstate.isDepthWriteEnabled(), compareOps[gstate.getDepthTestFunction()]);
bool depthTestUsed = !IsDepthTestEffectivelyDisabled();
renderManager->SetDepth(depthTestUsed, gstate.isDepthWriteEnabled(), compareOps[gstate.getDepthTestFunction()]);
if (depthTestUsed)
UpdateEverUsedEqualDepth(gstate.getDepthTestFunction());

// Stencil Test
if (stencilState.enabled) {
Expand Down
4 changes: 4 additions & 0 deletions GPU/Vulkan/DrawEngineVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,10 @@ void DrawEngineVulkan::DoFlush() {

if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
// Non-zero depth clears are unusual, but some drivers don't match drawn depth values to cleared values.
// Games sometimes expect exact matches (see #12626, for example) for equal comparisons.
if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f)
result.action = SW_DRAW_PRIMITIVES;

// Only here, where we know whether to clear or to draw primitives, should we actually set the current framebuffer! Because that gives use the opportunity
// to use a "pre-clear" render pass, for high efficiency on tilers.
Expand Down
25 changes: 16 additions & 9 deletions GPU/Vulkan/GPU_Vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,15 +257,22 @@ u32 GPU_Vulkan::CheckGPUFeatures() const {
INFO_LOG(G3D, "Deficient texture format support: 4444: %d 1555: %d 565: %d", fmt4444, fmt1555, fmt565);
}

if (!g_Config.bHighQualityDepth && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
}
else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
// Use fragment rounding on desktop and GLES3, most accurate.
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
}
else if (PSP_CoreParameter().compat.flags().VertexDepthRounding) {
features |= GPU_ROUND_DEPTH_TO_16BIT;
bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
bool prefer16 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D16;
if (!prefer16) {
if (!g_Config.bHighQualityDepth && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
} else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
if (prefer24 && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
// Here we can simulate a 16 bit depth buffer by scaling.
// Note that the depth buffer is fixed point, not floating, so dividing by 256 is pretty good.
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
} else {
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
}
} else if (PSP_CoreParameter().compat.flags().VertexDepthRounding) {
features |= GPU_ROUND_DEPTH_TO_16BIT;
}
}

if (g_Config.bStereoRendering && draw_->GetDeviceCaps().multiViewSupported) {
Expand Down
3 changes: 2 additions & 1 deletion GPU/Vulkan/StateMappingVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,11 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
}
} else {
// Depth Test
if (gstate.isDepthTestEnabled()) {
if (!IsDepthTestEffectivelyDisabled()) {
key.depthTestEnable = true;
key.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
key.depthWriteEnable = gstate.isDepthWriteEnabled();
UpdateEverUsedEqualDepth(gstate.getDepthTestFunction());
} else {
key.depthTestEnable = false;
key.depthWriteEnable = false;
Expand Down

0 comments on commit a853757

Please sign in to comment.