Optimize value conversion in (unused) depth readback.

This also exposes factors we can feed into shaders to do the conversion on the GPU.
hrydgard · Aug 9, 2020 · e6dfb55 · unknownbrackets · Aug 10, 2020 · unknownbrackets
1 parent 8f013ef
commit e6dfb55
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 9 deletions.
diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp
@@ -531,14 +531,20 @@ float ToScaledDepthFromIntegerScale(float z) {
 	}
 }
 
-float FromScaledDepth(float z) {
+// See struct DepthScaleFactors for how to apply.
+DepthScaleFactors GetDepthScaleFactors() {
+	DepthScaleFactors factors;
 	if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
-		return z * 65535.0f;
+		factors.offset = 0;
+		factors.scale = 65535.0f;
+		return factors;
 	}
 
 	const float depthSliceFactor = DepthSliceFactor();
 	const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
-	return (z - offset) * depthSliceFactor * 65535.0f;
+	factors.scale = depthSliceFactor * 65535.0f;
+	factors.offset = offset;
+	return factors;
 }
 
 void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {

diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h
@@ -72,7 +72,18 @@ struct ViewportAndScissor {
 };
 void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
 float ToScaledDepthFromIntegerScale(float z);
-float FromScaledDepth(float z);
+
+// Use like this: (z - offset) * scale
+struct DepthScaleFactors {
+	float offset;
+	float scale;
+
+	float Apply(float z) const {
+		return (z - offset) * scale;
+	}
+};
+DepthScaleFactors GetDepthScaleFactors();
+
 float DepthSliceFactor();
 
 // These are common to all modern APIs and can be easily converted with a lookup table.

diff --git a/GPU/Directx9/FramebufferManagerDX9.cpp b/GPU/Directx9/FramebufferManagerDX9.cpp
@@ -538,12 +538,13 @@ static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {
 				const u32 *packed = (const u32 *)locked.pBits;
 				u16 *depth = (u16 *)Memory::GetPointer(z_address);
 
+				DepthScaleFactors depthScale = GetDepthScaleFactors();
 				// TODO: Optimize.
 				for (int yp = 0; yp < h; ++yp) {
 					for (int xp = 0; xp < w; ++xp) {
 						const int offset = (yp + y) * vfb->z_stride + x + xp;
 
-						float scaled = FromScaledDepth((packed[offset] & 0x00FFFFFF) * (1.0f / 16777215.0f));
+						float scaled = depthScale.Apply((packed[offset] & 0x00FFFFFF) * (1.0f / 16777215.0f));
 						if (scaled <= 0.0f) {
 							depth[offset] = 0;
 						} else if (scaled >= 65535.0f) {

diff --git a/GPU/GLES/DepthBufferGLES.cpp b/GPU/GLES/DepthBufferGLES.cpp
@@ -169,6 +169,7 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int
 
 	int totalPixels = h == 1 ? packWidth : vfb->z_stride * h;
 	if (format16Bit) {
+		// TODO: We have to apply GetDepthScaleFactors here too, right?
 		for (int yp = 0; yp < h; ++yp) {
 			int row_offset = vfb->z_stride * yp;
 			for (int xp = 0; xp < packWidth; ++xp) {
@@ -177,11 +178,13 @@ void FramebufferManagerGLES::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int
 			}
 		}
 	} else {
+		// TODO: Apply this in the shader.
+		DepthScaleFactors depthScale = GetDepthScaleFactors();
 		for (int yp = 0; yp < h; ++yp) {
 			int row_offset = vfb->z_stride * yp;
 			for (int xp = 0; xp < packWidth; ++xp) {
 				const int i = row_offset + xp;
-				float scaled = FromScaledDepth(packedf[i]);
+				float scaled = depthScale.Apply(packedf[i]);
 				if (scaled <= 0.0f) {
 					depth[i] = 0;
 				} else if (scaled >= 65535.0f) {

diff --git a/Windows/GEDebugger/GEDebugger.cpp b/Windows/GEDebugger/GEDebugger.cpp
@@ -546,9 +546,12 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
 		break;
 
 	case GPU_DBG_FORMAT_24BIT_8X:
+	{
+		DepthScaleFactors depthScale = GetDepthScaleFactors();
 		// These are only ever going to be depth values, so let's also show scaled to 16 bit.
-		_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), FromScaledDepth((pix & 0x00FFFFFF) * (1.0f / 16777215.0f)));
+		_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), depthScale.Apply((pix & 0x00FFFFFF) * (1.0f / 16777215.0f)));
 		break;
+	}
 
 	case GPU_DBG_FORMAT_24BIT_8X_DIV_256:
 		{
@@ -563,15 +566,24 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
 		_snwprintf(desc, 256, L"%d,%d: %d / %f", x, y, (pix >> 24) & 0xFF, ((pix >> 24) & 0xFF) * (1.0f / 255.0f));
 		break;
 
-	case GPU_DBG_FORMAT_FLOAT:
-		_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, *(float *)&pix, FromScaledDepth(*(float *)&pix));
+	case GPU_DBG_FORMAT_FLOAT: {
+		float pixf = *(float *)&pix;
+		DepthScaleFactors depthScale = GetDepthScaleFactors();
+		_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, pixf, depthScale.Apply(pixf));
 		break;
+	}
 
 	case GPU_DBG_FORMAT_FLOAT_DIV_256:
 		{
 			double z = *(float *)&pix;
 			int z24 = (int)(z * 16777215.0);
+
+			DepthScaleFactors factors = GetDepthScaleFactors();
+			// TODO: Use GetDepthScaleFactors here too, verify it's the same.
 			int z16 = z24 - 0x800000 + 0x8000;
+
+			int z16_2 = factors.Apply(z);
+
 			_snwprintf(desc, 256, L"%d,%d: %d / %f", x, y, z16, (z - 0.5 + (1.0 / 512.0)) * 256.0);
 		}
 		break;