Skip to content

Commit

Permalink
Optimize value conversion in (unused) depth readback.
Browse files Browse the repository at this point in the history
This also exposes factors we can feed into shaders to do the conversion
on the GPU.
  • Loading branch information
hrydgard committed Aug 9, 2020
1 parent 8f013ef commit e6dfb55
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 9 deletions.
12 changes: 9 additions & 3 deletions GPU/Common/GPUStateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,14 +531,20 @@ float ToScaledDepthFromIntegerScale(float z) {
}
}

float FromScaledDepth(float z) {
// See struct DepthScaleFactors for how to apply.
DepthScaleFactors GetDepthScaleFactors() {
DepthScaleFactors factors;
if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
return z * 65535.0f;
factors.offset = 0;
factors.scale = 65535.0f;
return factors;
}

const float depthSliceFactor = DepthSliceFactor();
const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
return (z - offset) * depthSliceFactor * 65535.0f;
factors.scale = depthSliceFactor * 65535.0f;
factors.offset = offset;
return factors;
}

void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
Expand Down
13 changes: 12 additions & 1 deletion GPU/Common/GPUStateUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,18 @@ struct ViewportAndScissor {
};
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
float ToScaledDepthFromIntegerScale(float z);
float FromScaledDepth(float z);

// Use like this: (z - offset) * scale
struct DepthScaleFactors {
float offset;
float scale;

float Apply(float z) const {
return (z - offset) * scale;
}
};
DepthScaleFactors GetDepthScaleFactors();

float DepthSliceFactor();

// These are common to all modern APIs and can be easily converted with a lookup table.
Expand Down
3 changes: 2 additions & 1 deletion GPU/Directx9/FramebufferManagerDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,12 +538,13 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {
const u32 *packed = (const u32 *)locked.pBits;
u16 *depth = (u16 *)Memory::GetPointer(z_address);

DepthScaleFactors depthScale = GetDepthScaleFactors();
// TODO: Optimize.
for (int yp = 0; yp < h; ++yp) {
for (int xp = 0; xp < w; ++xp) {
const int offset = (yp + y) * vfb->z_stride + x + xp;

float scaled = FromScaledDepth((packed[offset] & 0x00FFFFFF) * (1.0f / 16777215.0f));
float scaled = depthScale.Apply((packed[offset] & 0x00FFFFFF) * (1.0f / 16777215.0f));
if (scaled <= 0.0f) {
depth[offset] = 0;
} else if (scaled >= 65535.0f) {
Expand Down
5 changes: 4 additions & 1 deletion GPU/GLES/DepthBufferGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int

int totalPixels = h == 1 ? packWidth : vfb->z_stride * h;
if (format16Bit) {
// TODO: We have to apply GetDepthScaleFactors here too, right?

This comment has been minimized.

Copy link
@unknownbrackets

unknownbrackets Aug 10, 2020

Collaborator

No, see u_depthDownloadFactor. This path means we went through the shader.

We don't actually use any shader at all in the other path, which is why the factor is applied on the CPU.

-[Unknown]

This comment has been minimized.

Copy link
@unknownbrackets

unknownbrackets Aug 10, 2020

Collaborator

Note: I think we'll have more accurate results and easier time passing pspautotests about it with the float download and CPU path, but not sure.

-[Unknown]

for (int yp = 0; yp < h; ++yp) {
int row_offset = vfb->z_stride * yp;
for (int xp = 0; xp < packWidth; ++xp) {
Expand All @@ -177,11 +178,13 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int
}
}
} else {
// TODO: Apply this in the shader.
DepthScaleFactors depthScale = GetDepthScaleFactors();
for (int yp = 0; yp < h; ++yp) {
int row_offset = vfb->z_stride * yp;
for (int xp = 0; xp < packWidth; ++xp) {
const int i = row_offset + xp;
float scaled = FromScaledDepth(packedf[i]);
float scaled = depthScale.Apply(packedf[i]);
if (scaled <= 0.0f) {
depth[i] = 0;
} else if (scaled >= 65535.0f) {
Expand Down
18 changes: 15 additions & 3 deletions Windows/GEDebugger/GEDebugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,9 +546,12 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
break;

case GPU_DBG_FORMAT_24BIT_8X:
{
DepthScaleFactors depthScale = GetDepthScaleFactors();
// These are only ever going to be depth values, so let's also show scaled to 16 bit.
_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), FromScaledDepth((pix & 0x00FFFFFF) * (1.0f / 16777215.0f)));
_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), depthScale.Apply((pix & 0x00FFFFFF) * (1.0f / 16777215.0f)));
break;
}

case GPU_DBG_FORMAT_24BIT_8X_DIV_256:
{
Expand All @@ -563,15 +566,24 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
_snwprintf(desc, 256, L"%d,%d: %d / %f", x, y, (pix >> 24) & 0xFF, ((pix >> 24) & 0xFF) * (1.0f / 255.0f));
break;

case GPU_DBG_FORMAT_FLOAT:
_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, *(float *)&pix, FromScaledDepth(*(float *)&pix));
case GPU_DBG_FORMAT_FLOAT: {
float pixf = *(float *)&pix;
DepthScaleFactors depthScale = GetDepthScaleFactors();
_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, pixf, depthScale.Apply(pixf));
break;
}

case GPU_DBG_FORMAT_FLOAT_DIV_256:
{
double z = *(float *)&pix;
int z24 = (int)(z * 16777215.0);

DepthScaleFactors factors = GetDepthScaleFactors();
// TODO: Use GetDepthScaleFactors here too, verify it's the same.
int z16 = z24 - 0x800000 + 0x8000;

int z16_2 = factors.Apply(z);

_snwprintf(desc, 256, L"%d,%d: %d / %f", x, y, z16, (z - 0.5 + (1.0 / 512.0)) * 256.0);
}
break;
Expand Down

0 comments on commit e6dfb55

Please sign in to comment.