Permalink
Browse files

GPU: Refactor cull range calculation together.

  • Loading branch information...
unknownbrackets committed Sep 18, 2018
1 parent 9859827 commit 52baec21a85f1f74ce6cc7e0019dd315779463ff
Showing with 53 additions and 102 deletions.
  1. +44 −35 GPU/Common/ShaderUniforms.cpp
  2. +2 −0 GPU/Common/ShaderUniforms.h
  3. +3 −34 GPU/Directx9/ShaderManagerDX9.cpp
  4. +4 −33 GPU/GLES/ShaderManagerGLES.cpp
@@ -26,6 +26,49 @@ static void ConvertProjMatrixToD3D11(Matrix4x4 &in) {
in.translateAndScale(trans, scale);
}
void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ) {
// Account for the projection viewport adjustment when viewport is too large.
auto reverseViewportX = [](float x) {
float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
return pspViewport * (1.0f / gstate_c.vpWidthScale);
};
auto reverseViewportY = [flipViewport](float y) {
float heightScale = gstate_c.vpHeightScale;
if (flipViewport) {
// For D3D11 and GLES non-buffered.
heightScale = -heightScale;
}
float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
return pspViewport * (1.0f / gstate_c.vpHeightScale);
};
auto reverseViewportZ = [hasNegZ](float z) {
float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale());
// Differs from GLES: depth is 0 to 1, not -1 to 1.
float realViewport = (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale);
return hasNegZ ? realViewport : (realViewport * 0.5f + 0.5f);
};
auto sortPair = [](float a, float b) {
return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
};
// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
// Any vertex outside this range (unless depth clamp enabled) is discarded.
auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
minValues[0] = x.first;
minValues[1] = y.first;
minValues[2] = z.first;
minValues[3] = clampEnable;
maxValues[0] = x.second;
maxValues[1] = y.second;
maxValues[2] = z.second;
maxValues[3] = NAN;
}
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport) {
if (dirtyUniforms & DIRTY_TEXENV) {
Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor);
@@ -193,41 +236,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
}
if (dirtyUniforms & DIRTY_CULLRANGE) {
// Account for the projection viewport adjustment when viewport is too large.
auto reverseViewportX = [](float x) {
float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
return pspViewport * (1.0f / gstate_c.vpWidthScale);
};
auto reverseViewportY = [flipViewport](float y) {
float heightScale = gstate_c.vpHeightScale;
if (flipViewport) {
// For D3D11.
heightScale = -heightScale;
}
float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
return pspViewport * (1.0f / gstate_c.vpHeightScale);
};
auto reverseViewportZ = [](float z) {
float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale());
// Differs from GLES: depth is 0 to 1, not -1 to 1.
return (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale) * 0.5f + 0.5f;
};
auto sortPair = [](float a, float b) {
return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
};
// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
// Any vertex outside this range (unless depth clamp enabled) is discarded.
auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
float minValues[4]{ x.first, y.first, z.first, clampEnable };
memcpy(ub->cullRangeMin, minValues, sizeof(ub->cullRangeMin));
float maxValues[4]{ x.second, y.second, z.second, NAN };
memcpy(ub->cullRangeMax, maxValues, sizeof(ub->cullRangeMax));
CalcCullRange(ub->cullRangeMin, ub->cullRangeMax, flipViewport, false);
}
if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
@@ -181,6 +181,8 @@ static const char *cb_vs_bonesStr =
R"( float4x3 u_bone[8];
)";
void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ);
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport);
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);
@@ -36,6 +36,7 @@
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Directx9/ShaderManagerDX9.h"
#include "GPU/Directx9/DrawEngineDX9.h"
#include "GPU/Directx9/FramebufferDX9.h"
@@ -448,41 +449,9 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data);
}
if (dirtyUniforms & DIRTY_CULLRANGE) {
// Account for the projection viewport adjustment when viewport is too large.
auto reverseViewportX = [](float x) {
float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
return (pspViewport - gstate_c.vpXOffset) * (1.0f / gstate_c.vpWidthScale);
};
auto reverseViewportY = [](float y) {
float yOffset = gstate_c.vpYOffset;
if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) {
// GL upside down is a pain as usual.
// TODO: Is this right?
yOffset = -yOffset;
}
float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
return (pspViewport - yOffset) * (1.0f / gstate_c.vpHeightScale);
};
auto reverseViewportZ = [](float z) {
float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale());
// Differs from GLES: depth is 0 to 1, not -1 to 1.
return (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale) * 0.5f + 0.5f;
};
auto sortPair = [](float a, float b) {
return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
};
// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
// Any vertex outside this range (unless depth clamp enabled) is discarded.
auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
float minValues[4]{ x.first, y.first, z.first, clampEnable };
float minValues[4], maxValues[4];
CalcCullRange(minValues, maxValues, false, false);
VSSetFloatUniform4(CONST_VS_CULLRANGEMIN, minValues);
float maxValues[4]{ x.second, y.second, z.second, NAN };
VSSetFloatUniform4(CONST_VS_CULLRANGEMAX, maxValues);
}
@@ -42,9 +42,10 @@
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/GLES/ShaderManagerGLES.h"
#include "GPU/GLES/DrawEngineGLES.h"
#include "FramebufferManagerGLES.h"
#include "GPU/GLES/FramebufferManagerGLES.h"
Shader::Shader(GLRenderManager *render, const char *code, const std::string &desc, uint32_t glShaderType, bool useHWTransform, uint32_t attrMask, uint64_t uniformMask)
: render_(render), failed_(false), useHWTransform_(useHWTransform), attrMask_(attrMask), uniformMask_(uniformMask) {
@@ -484,39 +485,9 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
SetFloatUniform4(render_, &u_depthRange, data);
}
if (dirty & DIRTY_CULLRANGE) {
// Account for the projection viewport adjustment when viewport is too large.
auto reverseViewportX = [](float x) {
float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
return (pspViewport - gstate_c.vpXOffset) * (1.0f / gstate_c.vpWidthScale);
};
auto reverseViewportY = [](float y) {
float heightScale = gstate_c.vpHeightScale;
if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) {
// GL upside down is a pain as usual.
heightScale = -heightScale;
}
float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
return (pspViewport - gstate_c.vpYOffset) * (1.0f / heightScale);
};
auto reverseViewportZ = [](float z) {
float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale());
return (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale);
};
auto sortPair = [](float a, float b) {
return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
};
// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
// Any vertex outside this range (unless depth clamp enabled) is discarded.
auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
float minValues[4]{ x.first, y.first, z.first, clampEnable };
float minValues[4], maxValues[4];
CalcCullRange(minValues, maxValues, g_Config.iRenderingMode == FB_NON_BUFFERED_MODE, true);
SetFloatUniform4(render_, &u_cullRangeMin, minValues);
float maxValues[4]{ x.second, y.second, z.second, NAN };
SetFloatUniform4(render_, &u_cullRangeMax, maxValues);
}

0 comments on commit 52baec2

Please sign in to comment.