Skip to content
Permalink
Browse files

Merge pull request #11379 from unknownbrackets/softgpu

Correct interactions between fog, doubling, and color testing
  • Loading branch information...
hrydgard committed Sep 10, 2018
2 parents ae73b8f + 817b5d7 commit e2a9f6acdd74904a4131aa8d7bb620441d2b6729
@@ -233,7 +233,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
bool enableFog = gstate.isFogEnabled() && !isModeThrough;
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE;
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix));
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
@@ -268,6 +268,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
WRITE(p, " float4 v = p;\n"); break;
}
}

if (enableColorDoubling) {
// This happens before fog is applied.
WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
}
} else {
// No texture mapping
WRITE(p, " float4 v = In.v_color0 %s;\n", secondary);
@@ -303,6 +308,12 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
}
}
}

if (enableFog) {
WRITE(p, " float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
}

if (enableColorTest) {
if (colorTestAgainstZero) {
// When testing against 0 (common), we can avoid some math.
@@ -339,18 +350,10 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
}
}

// Color doubling happens after the color test.
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}

if (enableFog) {
WRITE(p, " float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
}

if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
const char *srcFactor = "ERROR";
switch (replaceBlendFuncA) {
@@ -500,11 +500,22 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
WRITE(p, " vec4 v = p;\n"); break;
}
}

if (enableColorDoubling) {
// This happens before fog is applied.
WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
}
} else {
// No texture mapping
WRITE(p, " vec4 v = v_color0 %s;\n", secondary);
}

if (enableFog) {
WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n");
}

// Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255].
// So we have to scale to account for the difference.
std::string alphaTestXCoord = "0";
@@ -576,7 +587,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture);
WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture);
if (colorTestFunc == GE_COMP_EQUAL) {
// Equal means all parts must be equal.
// Equal means all parts must be equal (so discard if any is not.)
WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement);
} else {
// Not equal means any part must be not equal.
@@ -602,19 +613,10 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
}
}

// Color doubling happens after the color test.
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
}

if (enableFog) {
WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n");
}

if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
const char *srcFactor = "ERROR";
switch (replaceBlendFuncA) {
@@ -81,6 +81,8 @@ void FragmentTestCacheGLES::BindTestTexture(int slot) {
GLRTexture *tex = CreateTestTexture(funcs, refs, masks, valid);
lastTexture_ = tex;
render_->BindTexture(slot, tex);
// We only need to do this once for the texture.
render_->SetTextureSampler(slot, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
FragmentTestTexture item;
item.lastFrame = gpuStats.numFlips;
item.texture = tex;
@@ -157,7 +157,7 @@ void ProcessRect(const VertexData& v0, const VertexData& v1)
// Color and depth values of second vertex are used for the whole rectangle
buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth = 1.0f;
buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth;

VertexData* topleft = &buf[0];
VertexData* topright = &buf[1];
@@ -484,9 +484,8 @@ static inline bool StencilTestPassed(u8 stencil)
return true;
}

static inline u8 ApplyStencilOp(int op, int x, int y)
{
u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask?
static inline u8 ApplyStencilOp(int op, u8 old_stencil) {
// TODO: Apply mask to reference or old stencil?
u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask?

switch (op) {
@@ -538,71 +537,71 @@ static inline u8 ApplyStencilOp(int op, int x, int y)
return old_stencil;
}

static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color)
{
static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) {
// All of the operations here intentionally preserve alpha/stencil.
switch (op) {
case GE_LOGIC_CLEAR:
new_color = 0;
new_color &= 0xFF000000;
break;

case GE_LOGIC_AND:
new_color = new_color & old_color;
new_color = new_color & (old_color | 0xFF000000);
break;

case GE_LOGIC_AND_REVERSE:
new_color = new_color & ~old_color;
new_color = new_color & (~old_color | 0xFF000000);
break;

case GE_LOGIC_COPY:
//new_color = new_color;
// No change to new_color.
break;

case GE_LOGIC_AND_INVERTED:
new_color = ~new_color & old_color;
new_color = (~new_color & (old_color & 0x00FFFFFF)) | (new_color & 0xFF000000);
break;

case GE_LOGIC_NOOP:
new_color = old_color;
new_color = (old_color & 0x00FFFFFF) | (new_color & 0xFF000000);
break;

case GE_LOGIC_XOR:
new_color = new_color ^ old_color;
new_color = new_color ^ (old_color & 0x00FFFFFF);
break;

case GE_LOGIC_OR:
new_color = new_color | old_color;
new_color = new_color | (old_color & 0x00FFFFFF);
break;

case GE_LOGIC_NOR:
new_color = ~(new_color | old_color);
new_color = (~(new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
break;

case GE_LOGIC_EQUIV:
new_color = ~(new_color ^ old_color);
new_color = (~(new_color ^ old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
break;

case GE_LOGIC_INVERTED:
new_color = ~old_color;
new_color = (~old_color & 0x00FFFFFF) | (new_color & 0xFF000000);
break;

case GE_LOGIC_OR_REVERSE:
new_color = new_color | ~old_color;
new_color = new_color | (~old_color & 0x00FFFFFF);
break;

case GE_LOGIC_COPY_INVERTED:
new_color = ~new_color;
new_color = (~new_color & 0x00FFFFFF) | (new_color & 0xFF000000);
break;

case GE_LOGIC_OR_INVERTED:
new_color = ~new_color | old_color;
new_color = ((~new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
break;

case GE_LOGIC_NAND:
new_color = ~(new_color & old_color);
new_color = (~(new_color & old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
break;

case GE_LOGIC_SET:
new_color = 0xFFFFFFFF;
new_color |= 0x00FFFFFF;
break;
}

@@ -623,15 +622,25 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
// We can be accurate up to 24 bit integers, should be enough.
const __m128 p = _mm_cvtepi32_ps(prim_color.ivec);
const __m128 t = _mm_cvtepi32_ps(texcolor.ivec);
out_rgb.ivec = _mm_cvtps_epi32(_mm_div_ps(_mm_mul_ps(p, t), _mm_set_ps1(255.0f)));
const __m128 b = _mm_mul_ps(p, t);
if (gstate.isColorDoublingEnabled()) {
// We double right here, only for modulate. Other tex funcs do not color double.
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(2.0f / 255.0f)));
} else {
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f)));
}

if (rgba) {
return Vec4<int>(out_rgb.ivec);
} else {
out_a = prim_color.a();
}
#else
out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
if (gstate.isColorDoublingEnabled()) {
out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255;
} else {
out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
}
out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a();
#endif
break;
@@ -895,41 +904,47 @@ static inline Vec3<int> AlphaBlendingResult(const Vec4<int> &source, const Vec4<

template <bool clearMode>
inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<int> &color_in) {
Vec4<int> prim_color = color_in;
// Depth range test
// TODO: Clear mode?
Vec4<int> prim_color = color_in.Clamp(0, 255);
// Depth range test - applied in clear mode, if not through mode.
if (!gstate.isModeThrough())
if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
return;

if (gstate.isColorTestEnabled() && !clearMode)
if (!ColorTestPassed(prim_color.rgb()))
return;

// TODO: Does a need to be clamped?
if (gstate.isAlphaTestEnabled() && !clearMode)
if (!AlphaTestPassed(prim_color.a()))
return;

// Fog is applied prior to color test.
if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
prim_color.r() = fogColor.r();
prim_color.g() = fogColor.g();
prim_color.b() = fogColor.b();
}

if (gstate.isColorTestEnabled() && !clearMode)
if (!ColorTestPassed(prim_color.rgb()))
return;

// In clear mode, it uses the alpha color as stencil.
u8 stencil = clearMode ? prim_color.a() : GetPixelStencil(p.x, p.y);
// TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? Probably yes
if (!clearMode && (gstate.isStencilTestEnabled() || gstate.isDepthTestEnabled())) {
if (gstate.isStencilTestEnabled() && !StencilTestPassed(stencil)) {
stencil = ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y);
stencil = ApplyStencilOp(gstate.getStencilOpSFail(), stencil);
SetPixelStencil(p.x, p.y, stencil);
return;
}

// Also apply depth at the same time. If disabled, same as passing.
if (gstate.isDepthTestEnabled() && !DepthTestPassed(p.x, p.y, z)) {
if (gstate.isStencilTestEnabled()) {
stencil = ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y);
stencil = ApplyStencilOp(gstate.getStencilOpZFail(), stencil);
SetPixelStencil(p.x, p.y, stencil);
}
return;
} else if (gstate.isStencilTestEnabled()) {
stencil = ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y);
stencil = ApplyStencilOp(gstate.getStencilOpZPass(), stencil);
}

if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled()) {
@@ -939,28 +954,12 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in
SetPixelDepth(p.x, p.y, z);
}

// Doubling happens only when texturing is enabled, and after tests.
if (gstate.isTextureMapEnabled() && gstate.isColorDoublingEnabled() && !clearMode) {
// TODO: Does this need to be clamped before blending?
prim_color.r() <<= 1;
prim_color.g() <<= 1;
prim_color.b() <<= 1;
}

if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
prim_color.r() = fogColor.r();
prim_color.g() = fogColor.g();
prim_color.b() = fogColor.b();
}

const u32 old_color = GetPixelColor(p.x, p.y);
u32 new_color;

if (gstate.isAlphaBlendEnabled() && !clearMode) {
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
// ToRGBA() always automatically clamps.
// ToRGB() always automatically clamps.
new_color = AlphaBlendingResult(prim_color, dst).ToRGB();
new_color |= stencil << 24;
} else {
@@ -974,8 +973,8 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in

// Logic ops are applied after blending (if blending is enabled.)
if (gstate.isLogicOpEnabled() && !clearMode) {
// Logic ops don't affect stencil.
new_color = (stencil << 24) | (ApplyLogicOp(gstate.getLogicOp(), old_color, new_color) & 0x00FFFFFF);
// Logic ops don't affect stencil, which happens inside ApplyLogicOp.
new_color = ApplyLogicOp(gstate.getLogicOp(), old_color, new_color);
}

if (clearMode) {
Oops, something went wrong.

0 comments on commit e2a9f6a

Please sign in to comment.
You can’t perform that action at this time.