Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for binding the depth buffer as a color target. Fixes Kurohyo depth sorting #15772

Merged
merged 9 commits into from
Aug 1, 2022
2 changes: 1 addition & 1 deletion Common/GPU/Shader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void ShaderLanguageDesc::Init(ShaderLanguage lang) {
fragColor0 = "outfragment.target";
fragColor1 = "outfragment.target1";
} else {
fragColor0 = "target";
fragColor0 = "outfragment.target";
}
varying_fs = "in";
varying_vs = "out";
Expand Down
3 changes: 2 additions & 1 deletion GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,9 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
}

float texturePixels = 256;
if (clutFormat != GE_CMODE_32BIT_ABGR8888)
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512;
}

if (shift) {
WRITE(p, " index = (int(uint(index) >> uint(%i)) & 0x%02x)", shift, mask);
Expand Down
49 changes: 41 additions & 8 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too.
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps;
bool colorToDepth = id.Bit(FS_BIT_COLOR_TO_DEPTH);

GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
Expand Down Expand Up @@ -122,7 +123,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);

bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT) || colorToDepth;

if (shaderDepal && !doTexture) {
*errorString = "depal requires a texture";
Expand All @@ -135,6 +136,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}

if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) {
if (colorToDepth) {
WRITE(p, "precision highp int;\n");
WRITE(p, "precision highp float;\n");
}

if (useDiscardStencilBugWorkaround && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
}
Expand Down Expand Up @@ -274,9 +280,16 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " float depth : SV_Depth;\n");
}
WRITE(p, "};\n");
} else if (compat.shaderLanguage == HLSL_D3D9) {
WRITE(p, "struct PS_OUT {\n");
WRITE(p, " vec4 target : COLOR;\n");
if (writeDepth) {
WRITE(p, " float depth : DEPTH;\n");
}
WRITE(p, "};\n");
}
} else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
if ((shaderDepal || colorWriteMask) && gl_extensions.IsGLES) {
if ((shaderDepal || colorWriteMask || colorToDepth) && gl_extensions.IsGLES) {
WRITE(p, "precision highp int;\n");
}

Expand Down Expand Up @@ -441,8 +454,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " float gl_FragDepth;\n");
}
} else if (compat.shaderLanguage == HLSL_D3D9) {
WRITE(p, "vec4 main( PS_IN In ) : COLOR {\n");
WRITE(p, "PS_OUT main( PS_IN In ) {\n");
WRITE(p, " PS_OUT outfragment;\n");
WRITE(p, " vec4 target;\n");
if (colorToDepth) {
WRITE(p, " float gl_FragDepth;\n");
}
} else {
WRITE(p, "void main() {\n");
}
Expand Down Expand Up @@ -1034,7 +1051,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu

// Final color computed - apply color write mask.
// TODO: Maybe optimize to only do math on the affected channels?
// Or .. meh.
// Or .. meh. That would require more shader bits. Though we could
// of course optimize for the common mask 0xF00000, though again, blue-to-alpha
// does a better job with that.
if (colorWriteMask) {
WRITE(p, " highp uint v32 = packUnorm4x8(%s);\n", compat.fragColor0);
WRITE(p, " highp uint d32 = packUnorm4x8(destColor);\n");
Expand All @@ -1047,6 +1066,22 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " %s = vec4(0.0, 0.0, 0.0, %s.z); // blue to alpha\n", compat.fragColor0, compat.fragColor0);
}

if (colorToDepth) {
DepthScaleFactors factors = GetDepthScaleFactors();

if (compat.bitwiseOps) {
WRITE(p, " highp float depthValue = float(int(%s.x * 31.99) | (int(%s.y * 63.99) << 5) | (int(%s.z * 31.99) << 11)) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0);
} else {
// D3D9-compatible alternative
WRITE(p, " highp float depthValue = (floor(%s.x * 31.99) + floor(%s.y * 63.99) * 32.0 + floor(%s.z * 31.99) * 2048.0) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0);
}
if (factors.scale != 1.0 || factors.offset != 0.0) {
WRITE(p, " gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale / 65535.0f, factors.offset);
} else {
WRITE(p, " gl_FragDepth = depthValue;\n");
}
}

if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
const double scale = DepthSliceFactor() * 65535.0;

Expand All @@ -1060,7 +1095,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
}
} else {
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
WRITE(p, " z = (1.0 / 65535.0) * floor(z * 65535.0);\n");
}
WRITE(p, " gl_FragDepth = z;\n");
} else if (useDiscardStencilBugWorkaround) {
Expand All @@ -1071,13 +1106,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " gl_FragDepth = gl_FragCoord.z;\n");
}

if (compat.shaderLanguage == HLSL_D3D11) {
if (compat.shaderLanguage == HLSL_D3D11 || compat.shaderLanguage == HLSL_D3D9) {
if (writeDepth) {
WRITE(p, " outfragment.depth = gl_FragDepth;\n");
}
WRITE(p, " return outfragment;\n");
} else if (compat.shaderLanguage == HLSL_D3D9) {
WRITE(p, " return target;\n");
}

WRITE(p, "}\n");
Expand Down
42 changes: 41 additions & 1 deletion GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,22 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
gstate_c.SetCurRTOffset(0, 0);
bool vfbFormatChanged = false;

if (params.fb_address == params.z_address) {
// Most likely Z will not be used in this pass, as that would wreak havoc (undefined behavior for sure)
// We probably don't need to do anything about that, but let's log it.
WARN_LOG_ONCE(color_equal_z, G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address);
}

FramebufferRenderMode mode = FB_MODE_NORMAL;

// Find a matching framebuffer
VirtualFramebuffer *vfb = nullptr;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];

const u32 bpp = v->format == GE_FORMAT_8888 ? 4 : 2;

if (v->fb_address == params.fb_address) {
if (params.fb_address == v->fb_address) {
vfb = v;
// Update fb stride in case it changed
if (vfb->fb_stride != params.fb_stride) {
Expand Down Expand Up @@ -311,6 +319,19 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
vfb->height = drawing_height;
}
break;
} else if (params.fb_address == v->z_address && params.fmt != GE_FORMAT_8888 && params.fb_stride == v->z_stride) {
// Looks like the game might be intending to use color to write directly to a Z buffer.
// This is seen in Kuroyou 2.

// Ignore this in this loop, BUT, we do a lookup in the depth tracking afterwards to
// make sure we get the latest one.
WARN_LOG_ONCE(color_matches_z, G3D, "Color framebuffer bound at %08x with likely intent to write explicit Z values using color. fmt = %s", params.fb_address, GeBufferFormatToString(params.fmt));
// Seems impractical to use the other 16-bit formats for this due to the limited control over alpha,
// so we'll simply only support 565.
if (params.fmt == GE_FORMAT_565) {
mode = FB_MODE_COLOR_TO_DEPTH;
break;
}
} else if (v->fb_stride == params.fb_stride && v->format == params.fmt) {
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * 4; // This should be * bpp, but leaving like this until after 1.13 to be safe. The God of War games use this for shadows.
u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp;
Expand Down Expand Up @@ -345,6 +366,25 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
}
}

if (mode == FB_MODE_COLOR_TO_DEPTH) {
// Lookup in the depth tracking to find which VFB has the latest version of this Z buffer.
// Then bind it in color-to-depth mode.
//
// We are going to do this by having a special render mode where we take color and move to
// depth in the fragment shader, and set color writes to off.
//
// We'll need a special fragment shader flag to convert color to depth.

for (auto &depth : this->trackedDepthBuffers_) {
if (depth->z_address == params.fb_address && depth->z_stride == params.fb_stride) {
// Found the matching depth buffer. Use this vfb.
vfb = depth->vfb;
}
}
}

gstate_c.SetFramebufferRenderMode(mode);

if (vfb) {
if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
// Even if it's not newly wrong, if this is larger we need to resize up.
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/FramebufferManagerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ class FramebufferManagerCommon {
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format);
void DestroyFramebuf(VirtualFramebuffer *v);

VirtualFramebuffer *DoSetRenderFrameBuffer(const FramebufferHeuristicParams &params, u32 skipDrawReason);
VirtualFramebuffer *DoSetRenderFrameBuffer(const FramebufferHeuristicParams &params, u32 skipDrawReason);
VirtualFramebuffer *SetRenderFrameBuffer(bool framebufChanged, int skipDrawReason) {
// Inlining this part since it's so frequent.
if (!framebufChanged && currentRenderVfb_) {
Expand Down
10 changes: 10 additions & 0 deletions GPU/Common/GPUStateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,16 @@ void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) {
return;
}

if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
// Suppress color writes entirely in this mode.
maskState.applyFramebufferRead = false;
maskState.rgba[0] = false;
maskState.rgba[1] = false;
maskState.rgba[2] = false;
maskState.rgba[3] = false;
return;
}

// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));

Expand Down
5 changes: 4 additions & 1 deletion GPU/Common/GPUStateUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,17 @@ struct ViewportAndScissor {
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
float ToScaledDepthFromIntegerScale(float z);

// Use like this: (z - offset) * scale
struct DepthScaleFactors {
float offset;
float scale;

float Apply(float z) const {
return (z - offset) * scale;
}

float ApplyInverse(float z) const {
return (z / scale) + offset;
}
};
DepthScaleFactors GetDepthScaleFactors();

Expand Down
6 changes: 6 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "Core/Config.h"

#include "GPU/ge_constants.h"
#include "GPU/GPU.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/ShaderId.h"
Expand Down Expand Up @@ -239,6 +240,8 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_COLOR_AGAINST_ZERO)) desc << "ColorTest0 " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match;
else if (id.Bit(FS_BIT_COLOR_TEST)) desc << "ColorTest " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match

if (id.Bit(FS_BIT_COLOR_TO_DEPTH)) desc << "ColorToDepth ";

return desc.str();
}

Expand All @@ -261,6 +264,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
bool useShaderDepal = gstate_c.useShaderDepal;
bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead);
bool colorToDepth = gstate_c.renderMode == FramebufferRenderMode::FB_MODE_COLOR_TO_DEPTH;

// Note how we here recompute some of the work already done in state mapping.
// Not ideal! At least we share the code.
Expand Down Expand Up @@ -292,6 +296,8 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D);
}

id.SetBit(FS_BIT_COLOR_TO_DEPTH, colorToDepth);

id.SetBit(FS_BIT_LMODE, lmode);
if (enableAlphaTest) {
// 5 bits total.
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ enum FShaderBit : uint8_t {
FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49,
FS_BIT_COLOR_WRITEMASK = 50,
FS_BIT_3D_TEXTURE = 51,
FS_BIT_COLOR_TO_DEPTH = 52,
};

static inline FShaderBit operator +(FShaderBit bit, int i) {
Expand Down
12 changes: 11 additions & 1 deletion GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,10 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac
}
}

if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
forceFiltering = TEX_FILTER_FORCE_NEAREST;
}

switch (forceFiltering) {
case TEX_FILTER_AUTO:
break;
Expand Down Expand Up @@ -2131,8 +2135,14 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
}

// Don't scale the PPGe texture.
if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd())
if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd()) {
plan.scaleFactor = 1;
}

// Don't upscale textures in color-to-depth mode.
if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
plan.scaleFactor = 1;
}

if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && plan.scaleFactor != 1 && plan.slowScaler) {
// Remember for later that we /wanted/ to scale this texture.
Expand Down
9 changes: 8 additions & 1 deletion GPU/D3D11/StateMappingD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,14 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
GenericStencilFuncState stencilState;
ConvertStencilFuncState(stencilState);

if (gstate.isModeClear()) {
if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
keys_.depthStencil.value = 0;
keys_.depthStencil.depthTestEnable = true;
keys_.depthStencil.depthWriteEnable = true;
keys_.depthStencil.stencilTestEnable = false;
keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
} else if (gstate.isModeClear()) {
keys_.depthStencil.value = 0;
keys_.depthStencil.depthTestEnable = true;
keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
Expand Down
9 changes: 8 additions & 1 deletion GPU/Directx9/StateMappingDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,14 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
ConvertStencilFuncState(stencilState);

// Set Stencil/Depth
if (gstate.isModeClear()) {

if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
dxstate.depthTest.enable();
dxstate.depthFunc.set(D3DCMP_ALWAYS);
dxstate.depthWrite.set(true);
dxstate.stencilTest.disable();
} else if (gstate.isModeClear()) {
// Depth Test
dxstate.depthTest.enable();
dxstate.depthFunc.set(D3DCMP_ALWAYS);
Expand Down
6 changes: 5 additions & 1 deletion GPU/GLES/StateMappingGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,11 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
GenericStencilFuncState stencilState;
ConvertStencilFuncState(stencilState);

if (gstate.isModeClear()) {
if (gstate_c.renderMode == FB_MODE_COLOR_TO_DEPTH) {
// Enforce plain depth writing.
renderManager->SetStencilDisabled();
renderManager->SetDepth(true, true, GL_ALWAYS);
} else if (gstate.isModeClear()) {
// Depth Test
if (gstate.isClearModeDepthMask()) {
framebufferManager_->SetDepthUpdated();
Expand Down
5 changes: 5 additions & 0 deletions GPU/GPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ class GPUInterface;
class GPUDebugInterface;
class GraphicsContext;

enum FramebufferRenderMode {
FB_MODE_NORMAL = 0,
FB_MODE_COLOR_TO_DEPTH = 1,
};

enum SkipDrawReasonFlags {
SKIPDRAW_SKIPFRAME = 1,
SKIPDRAW_NON_DISPLAYED_FB = 2, // Skip drawing to FBO:s that have not been displayed.
Expand Down
Loading