Permalink
Browse files

GPU: Apply color test after doubling.

  • Loading branch information...
unknownbrackets committed Sep 10, 2018
1 parent 8cdead9 commit 703181607e2319c146e940778fa12ba3ac71102e
View
@@ -233,7 +233,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
bool enableFog = gstate.isFogEnabled() && !isModeThrough;
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE;
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix));
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
@@ -304,6 +304,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
}
}
if (enableColorTest) {
// Color doubling happens before the color test, but we try to optimize doubling when test is off.
if (enableColorDoubling) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}
if (colorTestAgainstZero) {
// When testing against 0 (common), we can avoid some math.
// 0.002 is approximately half of 1.0 / 255.0.
@@ -322,28 +327,32 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
if (colorTestFuncs[colorTestFunc][0] != '#') {
const char *test = colorTestFuncs[colorTestFunc];
if (lang == HLSL_D3D11) {
WRITE(p, " uint3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
WRITE(p, " uint3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
WRITE(p, " uint3 v_masked = v_scaled & u_alphacolormask.rgb;\n");
WRITE(p, " uint3 colorTestRef = u_alphacolorref.rgb & u_alphacolormask.rgb;\n");
// We have to test the components separately, or we get incorrect results. See #10629.
WRITE(p, " if (v_masked.r %s colorTestRef.r && v_masked.g %s colorTestRef.g && v_masked.b %s colorTestRef.b) discard;\n", test, test, test);
} else {
// TODO: Use a texture to lookup bitwise ops instead?
WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n");
WRITE(p, " float3 colortest = roundAndScaleTo255v(clamp(v.rgb, 0.0, 1.0));\n");
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b)) clip(-1);\n", test, test, test);
}
}
else {
WRITE(p, lang == HLSL_DX9 ? " clip(-1);\n" : " discard;\n");
}
}
}
// Color doubling happens after the color test.
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}
} else {
// If there's no color test, we can potentially double and replace blend at once.
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}
}
if (enableFog) {
@@ -558,6 +558,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
}
if (enableColorTest) {
// Color doubling happens before the color test, but we try to optimize doubling when test is off.
if (enableColorDoubling) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
if (g_Config.bFragmentTestCache && !colorTestAgainstZero) {
WRITE(p, " vScale256.rgb = vScale256.rgb * 2.0;\n");
}
}
if (colorTestAgainstZero) {
// When testing against 0 (common), we can avoid some math.
// 0.002 is approximately half of 1.0 / 255.0.
@@ -576,7 +584,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture);
WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture);
if (colorTestFunc == GE_COMP_EQUAL) {
// Equal means all parts must be equal.
// Equal means all parts must be equal (so discard if any is not.)
WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement);
} else {
// Not equal means any part must be not equal.
@@ -587,7 +595,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
if (colorTestFuncs[colorTestFunc][0] != '#') {
if (bitwiseOps) {
// Apparently GLES3 does not support vector bitwise ops.
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)";
const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)";
WRITE(p, " if (%s %s %s) %s\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef, discardStatement);
@@ -600,13 +608,17 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
WRITE(p, " %s\n", discardStatement);
}
}
}
// Color doubling happens after the color test.
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}
} else {
// If there's no color test, we can potentially double and replace blend at once.
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}
}
if (enableFog) {
@@ -81,6 +81,8 @@ void FragmentTestCacheGLES::BindTestTexture(int slot) {
GLRTexture *tex = CreateTestTexture(funcs, refs, masks, valid);
lastTexture_ = tex;
render_->BindTexture(slot, tex);
// We only need to do this once for the texture.
render_->SetTextureSampler(slot, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
FragmentTestTexture item;
item.lastFrame = gpuStats.numFlips;
item.texture = tex;
@@ -622,15 +622,25 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
// We can be accurate up to 24 bit integers, should be enough.
const __m128 p = _mm_cvtepi32_ps(prim_color.ivec);
const __m128 t = _mm_cvtepi32_ps(texcolor.ivec);
out_rgb.ivec = _mm_cvtps_epi32(_mm_div_ps(_mm_mul_ps(p, t), _mm_set_ps1(255.0f)));
const __m128 b = _mm_mul_ps(p, t);
if (gstate.isColorDoublingEnabled()) {
// We double right here, only for modulate. Other tex funcs do not color double.
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(2.0f / 255.0f)));
} else {
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f)));
}
if (rgba) {
return Vec4<int>(out_rgb.ivec);
} else {
out_a = prim_color.a();
}
#else
out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
if (gstate.isColorDoublingEnabled()) {
out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255;
} else {
out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
}
out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a();
#endif
break;
@@ -894,7 +904,7 @@ static inline Vec3<int> AlphaBlendingResult(const Vec4<int> &source, const Vec4<
template <bool clearMode>
inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<int> &color_in) {
Vec4<int> prim_color = color_in;
Vec4<int> prim_color = color_in.Clamp(0, 255);
// Depth range test - applied in clear mode, if not through mode.
if (!gstate.isModeThrough())
if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
@@ -935,14 +945,6 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in
SetPixelDepth(p.x, p.y, z);
}
// Doubling happens only when texturing is enabled, and after tests.
if (gstate.isTextureMapEnabled() && gstate.isColorDoublingEnabled() && !clearMode) {
// TODO: Does this need to be clamped before blending?
prim_color.r() <<= 1;
prim_color.g() <<= 1;
prim_color.b() <<= 1;
}
if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
@@ -375,6 +375,11 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
}
if (enableColorTest) {
// Color doubling happens before the color test, but we try to optimize doubling when test is off.
if (enableColorDoubling) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}
if (colorTestAgainstZero) {
// When testing against 0 (common), we can avoid some math.
// Have my doubts that this special case is actually worth it, but whatever.
@@ -392,19 +397,23 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
} else {
const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
if (colorTestFuncs[colorTestFunc][0] != '#') {
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
WRITE(p, " if ((v_scaled & base.alphacolormask.rgb) %s (base.alphacolorref.rgb & base.alphacolormask.rgb)) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
} else {
WRITE(p, " %s\n", discardStatement);
}
}
}
// Color doubling happens after the color test.
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}
} else {
// If there's no color test, we can potentially double and replace blend at once.
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}
}
if (enableFog) {
View
@@ -373,6 +373,7 @@ int main(int argc, const char* argv[])
g_Config.iSplineBezierQuality = 2;
g_Config.bHighQualityDepth = true;
g_Config.bMemStickInserted = true;
g_Config.bFragmentTestCache = true;
#ifdef _WIN32
InitSysDirectories();

0 comments on commit 7031816

Please sign in to comment.