Permalink
Browse files

D3D9: Implement vertex range culling.

  • Loading branch information...
unknownbrackets committed Sep 17, 2018
1 parent ab3a466 commit 639a3f406d3a4b04c6b8119c30a7a3fd2b15c66e
@@ -314,7 +314,7 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
} }
const uint64_t vsUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | const uint64_t vsUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX |
DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_CULLRANGE |
DIRTY_AMBIENT | DIRTY_MATAMBIENTALPHA | DIRTY_MATSPECULAR | DIRTY_MATDIFFUSE | DIRTY_MATEMISSIVE | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3; DIRTY_AMBIENT | DIRTY_MATAMBIENTALPHA | DIRTY_MATSPECULAR | DIRTY_MATDIFFUSE | DIRTY_MATEMISSIVE | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3;
void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
@@ -425,7 +425,7 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4); VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4);
} }
if (dirtyUniforms & DIRTY_DEPTHRANGE) { if (dirtyUniforms & DIRTY_DEPTHRANGE) {
// Depth is [0, 1] mapping to [minz, maxz], not too hard. // Depth is [0, 1] mapping to [minz, maxz], not too hard.
float vpZScale = gstate.getViewportZScale(); float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter(); float vpZCenter = gstate.getViewportZCenter();
@@ -447,6 +447,45 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale };
VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data); VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data);
} }
if (dirtyUniforms & DIRTY_CULLRANGE) {
// Account for the projection viewport adjustment when viewport is too large.
auto reverseViewportX = [](float x) {
float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
return (pspViewport - gstate_c.vpXOffset) * (1.0f / gstate_c.vpWidthScale);
};
auto reverseViewportY = [](float y) {
float yOffset = gstate_c.vpYOffset;
if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) {
// GL upside down is a pain as usual.
// TODO: Is this right?
yOffset = -yOffset;
}
float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
return (pspViewport - yOffset) * (1.0f / gstate_c.vpHeightScale);
};
auto reverseViewportZ = [](float z) {
float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale());
// Differs from GLES: depth is 0 to 1, not -1 to 1.
return (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale) * 0.5f + 0.5f;
};
auto sortPair = [](float a, float b) {
return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
};
// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
// Any vertex outside this range (unless depth clamp enabled) is discarded.
auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
float minValues[4]{ x.first, y.first, z.first, clampEnable };
VSSetFloatUniform4(CONST_VS_CULLRANGEMIN, minValues);
float maxValues[4]{ x.second, y.second, z.second, NAN };
VSSetFloatUniform4(CONST_VS_CULLRANGEMAX, maxValues);
}
// Lighting // Lighting
if (dirtyUniforms & DIRTY_AMBIENT) { if (dirtyUniforms & DIRTY_AMBIENT) {
VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA()); VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA());
@@ -176,6 +176,10 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, "float4 u_depthRange : register(c%i);\n", CONST_VS_DEPTHRANGE); WRITE(p, "float4 u_depthRange : register(c%i);\n", CONST_VS_DEPTHRANGE);
} }
if (!isModeThrough) {
WRITE(p, "float4 u_cullRangeMin : register(c%i);\n", CONST_VS_CULLRANGEMIN);
WRITE(p, "float4 u_cullRangeMax : register(c%i);\n", CONST_VS_CULLRANGEMAX);
}
} else { } else {
WRITE(p, "cbuffer base : register(b0) {\n%s};\n", cb_baseStr); WRITE(p, "cbuffer base : register(b0) {\n%s};\n", cb_baseStr);
WRITE(p, "cbuffer lights: register(b1) {\n%s};\n", cb_vs_lightsStr); WRITE(p, "cbuffer lights: register(b1) {\n%s};\n", cb_vs_lightsStr);
@@ -370,22 +374,22 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
} }
if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) { if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) {
if (isModeThrough) { if (isModeThrough) {
WRITE(p, " Out.gl_Position = mul(u_proj_through, float4(In.position.xyz, 1.0));\n"); WRITE(p, " float4 outPos = mul(u_proj_through, float4(In.position.xyz, 1.0));\n");
} else { } else {
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " Out.gl_Position = depthRoundZVP(mul(u_proj, float4(In.position.xyz, 1.0)));\n"); WRITE(p, " float4 outPos = depthRoundZVP(mul(u_proj, float4(In.position.xyz, 1.0)));\n");
} else { } else {
WRITE(p, " Out.gl_Position = mul(u_proj, float4(In.position.xyz, 1.0));\n"); WRITE(p, " float4 outPos = mul(u_proj, float4(In.position.xyz, 1.0));\n");
} }
} }
} else { } else {
if (isModeThrough) { if (isModeThrough) {
WRITE(p, " Out.gl_Position = mul(float4(In.position.xyz, 1.0), u_proj_through);\n"); WRITE(p, " float4 outPos = mul(float4(In.position.xyz, 1.0), u_proj_through);\n");
} else { } else {
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " Out.gl_Position = depthRoundZVP(mul(float4(In.position.xyz, 1.0), u_proj));\n"); WRITE(p, " float4 outPos = depthRoundZVP(mul(float4(In.position.xyz, 1.0), u_proj));\n");
} else { } else {
WRITE(p, " Out.gl_Position = mul(float4(In.position.xyz, 1.0), u_proj);\n"); WRITE(p, " float4 outPos = mul(float4(In.position.xyz, 1.0), u_proj);\n");
} }
} }
} }
@@ -577,16 +581,16 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) { if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) {
// Final view and projection transforms. // Final view and projection transforms.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " Out.gl_Position = depthRoundZVP(mul(u_proj, viewPos));\n"); WRITE(p, " float4 outPos = depthRoundZVP(mul(u_proj, viewPos));\n");
} else { } else {
WRITE(p, " Out.gl_Position = mul(u_proj, viewPos);\n"); WRITE(p, " float4 outPos = mul(u_proj, viewPos);\n");
} }
} else { } else {
// Final view and projection transforms. // Final view and projection transforms.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " Out.gl_Position = depthRoundZVP(mul(viewPos, u_proj));\n"); WRITE(p, " float4 outPos = depthRoundZVP(mul(viewPos, u_proj));\n");
} else { } else {
WRITE(p, " Out.gl_Position = mul(viewPos, u_proj);\n"); WRITE(p, " float4 outPos = mul(viewPos, u_proj);\n");
} }
} }
@@ -811,6 +815,19 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
} }
} }
if (lang == HLSL_DX9 && !isModeThrough) {
WRITE(p, " float3 projPos = outPos.xyz / outPos.w;\n");
// Vertex range culling doesn't happen when depth is clamped, so only do this if in range.
WRITE(p, " if (u_cullRangeMin.w <= 0.0f || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n");
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y || projPos.z < u_cullRangeMin.z";
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y || projPos.z > u_cullRangeMax.z";
WRITE(p, " if (%s || %s) {\n", outMin, outMax);
WRITE(p, " outPos.w = u_cullRangeMax.w;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
}
WRITE(p, " Out.gl_Position = outPos;\n");
WRITE(p, " return Out;\n"); WRITE(p, " return Out;\n");
WRITE(p, "}\n"); WRITE(p, "}\n");
} }
@@ -53,6 +53,8 @@ namespace DX9 {
CONST_VS_BONE6 = 71, CONST_VS_BONE6 = 71,
CONST_VS_BONE7 = 74, CONST_VS_BONE7 = 74,
CONST_VS_BONE8 = 77, CONST_VS_BONE8 = 77,
CONST_VS_CULLRANGEMIN = 80,
CONST_VS_CULLRANGEMAX = 81,
}; };
}; };
@@ -457,7 +457,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
if (dirty & DIRTY_TEXMATRIX) { if (dirty & DIRTY_TEXMATRIX) {
SetMatrix4x3(render_, &u_texmtx, gstate.tgenMatrix); SetMatrix4x3(render_, &u_texmtx, gstate.tgenMatrix);
} }
if ((dirty & DIRTY_DEPTHRANGE) && u_depthRange != -1) { if (dirty & DIRTY_DEPTHRANGE) {
// Since depth is [-1, 1] mapping to [minz, maxz], this is easyish. // Since depth is [-1, 1] mapping to [minz, maxz], this is easyish.
float vpZScale = gstate.getViewportZScale(); float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter(); float vpZCenter = gstate.getViewportZCenter();
@@ -87,6 +87,10 @@ enum DoLightComputation {
// //
// Now, the regular machinery will take over and do the calculation again. // Now, the regular machinery will take over and do the calculation again.
// //
// Depth is not clipped to the viewport, but does clip to "minz" and "maxz". It may also be clamped
// to 0 and 65535 if a depth clamping/clipping flag is set (x/y clipping is performed only if depth
// needs to be clamped.)
//
// All this above is for full transform mode. // All this above is for full transform mode.
// In through mode, the Z coordinate just goes straight through and there is no perspective division. // In through mode, the Z coordinate just goes straight through and there is no perspective division.
// We simulate this of course with pretty much an identity matrix. Rounding Z becomes very easy. // We simulate this of course with pretty much an identity matrix. Rounding Z becomes very easy.

0 comments on commit 639a3f4

Please sign in to comment.