Skip to content

Commit

Permalink
Merge pull request #11379 from unknownbrackets/softgpu
Browse files Browse the repository at this point in the history
Correct interactions between fog, doubling, and color testing
  • Loading branch information
hrydgard committed Sep 10, 2018
2 parents ae73b8f + 817b5d7 commit e2a9f6a
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 86 deletions.
2 changes: 1 addition & 1 deletion GPU/Common/ShaderId.cpp
Expand Up @@ -233,7 +233,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
bool enableFog = gstate.isFogEnabled() && !isModeThrough; bool enableFog = gstate.isFogEnabled() && !isModeThrough;
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue(); bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue(); bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled(); bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE;
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix)); bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix));
bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
Expand Down
21 changes: 12 additions & 9 deletions GPU/Directx9/PixelShaderGeneratorDX9.cpp
Expand Up @@ -268,6 +268,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
WRITE(p, " float4 v = p;\n"); break; WRITE(p, " float4 v = p;\n"); break;
} }
} }

if (enableColorDoubling) {
// This happens before fog is applied.
WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
}
} else { } else {
// No texture mapping // No texture mapping
WRITE(p, " float4 v = In.v_color0 %s;\n", secondary); WRITE(p, " float4 v = In.v_color0 %s;\n", secondary);
Expand Down Expand Up @@ -303,6 +308,12 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
} }
} }
} }

if (enableFog) {
WRITE(p, " float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
}

if (enableColorTest) { if (enableColorTest) {
if (colorTestAgainstZero) { if (colorTestAgainstZero) {
// When testing against 0 (common), we can avoid some math. // When testing against 0 (common), we can avoid some math.
Expand Down Expand Up @@ -339,18 +350,10 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
} }
} }


// Color doubling happens after the color test. if (replaceBlend == REPLACE_BLEND_2X_SRC) {
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n"); WRITE(p, " v.rgb = v.rgb * 2.0;\n");
} }


if (enableFog) {
WRITE(p, " float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
}

if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
const char *srcFactor = "ERROR"; const char *srcFactor = "ERROR";
switch (replaceBlendFuncA) { switch (replaceBlendFuncA) {
Expand Down
24 changes: 13 additions & 11 deletions GPU/GLES/FragmentShaderGeneratorGLES.cpp
Expand Up @@ -500,11 +500,22 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
WRITE(p, " vec4 v = p;\n"); break; WRITE(p, " vec4 v = p;\n"); break;
} }
} }

if (enableColorDoubling) {
// This happens before fog is applied.
WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
}
} else { } else {
// No texture mapping // No texture mapping
WRITE(p, " vec4 v = v_color0 %s;\n", secondary); WRITE(p, " vec4 v = v_color0 %s;\n", secondary);
} }


if (enableFog) {
WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n");
}

// Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255]. // Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255].
// So we have to scale to account for the difference. // So we have to scale to account for the difference.
std::string alphaTestXCoord = "0"; std::string alphaTestXCoord = "0";
Expand Down Expand Up @@ -576,7 +587,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture); WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture);
WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture); WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture);
if (colorTestFunc == GE_COMP_EQUAL) { if (colorTestFunc == GE_COMP_EQUAL) {
// Equal means all parts must be equal. // Equal means all parts must be equal (so discard if any is not.)
WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement); WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement);
} else { } else {
// Not equal means any part must be not equal. // Not equal means any part must be not equal.
Expand All @@ -602,19 +613,10 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
} }
} }


// Color doubling happens after the color test. if (replaceBlend == REPLACE_BLEND_2X_SRC) {
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n"); WRITE(p, " v.rgb = v.rgb * 2.0;\n");
} }


if (enableFog) {
WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n");
}

if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
const char *srcFactor = "ERROR"; const char *srcFactor = "ERROR";
switch (replaceBlendFuncA) { switch (replaceBlendFuncA) {
Expand Down
2 changes: 2 additions & 0 deletions GPU/GLES/FragmentTestCacheGLES.cpp
Expand Up @@ -81,6 +81,8 @@ void FragmentTestCacheGLES::BindTestTexture(int slot) {
GLRTexture *tex = CreateTestTexture(funcs, refs, masks, valid); GLRTexture *tex = CreateTestTexture(funcs, refs, masks, valid);
lastTexture_ = tex; lastTexture_ = tex;
render_->BindTexture(slot, tex); render_->BindTexture(slot, tex);
// We only need to do this once for the texture.
render_->SetTextureSampler(slot, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
FragmentTestTexture item; FragmentTestTexture item;
item.lastFrame = gpuStats.numFlips; item.lastFrame = gpuStats.numFlips;
item.texture = tex; item.texture = tex;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Software/Clipper.cpp
Expand Up @@ -157,7 +157,7 @@ void ProcessRect(const VertexData& v0, const VertexData& v1)
// Color and depth values of second vertex are used for the whole rectangle // Color and depth values of second vertex are used for the whole rectangle
buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0; buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1; buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth = 1.0f; buf[0].fogdepth = buf[1].fogdepth = buf[2].fogdepth = buf[3].fogdepth;


VertexData* topleft = &buf[0]; VertexData* topleft = &buf[0];
VertexData* topright = &buf[1]; VertexData* topright = &buf[1];
Expand Down
107 changes: 53 additions & 54 deletions GPU/Software/Rasterizer.cpp
Expand Up @@ -484,9 +484,8 @@ static inline bool StencilTestPassed(u8 stencil)
return true; return true;
} }


static inline u8 ApplyStencilOp(int op, int x, int y) static inline u8 ApplyStencilOp(int op, u8 old_stencil) {
{ // TODO: Apply mask to reference or old stencil?
u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask?
u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask? u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask?


switch (op) { switch (op) {
Expand Down Expand Up @@ -538,71 +537,71 @@ static inline u8 ApplyStencilOp(int op, int x, int y)
return old_stencil; return old_stencil;
} }


static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) {
{ // All of the operations here intentionally preserve alpha/stencil.
switch (op) { switch (op) {
case GE_LOGIC_CLEAR: case GE_LOGIC_CLEAR:
new_color = 0; new_color &= 0xFF000000;
break; break;


case GE_LOGIC_AND: case GE_LOGIC_AND:
new_color = new_color & old_color; new_color = new_color & (old_color | 0xFF000000);
break; break;


case GE_LOGIC_AND_REVERSE: case GE_LOGIC_AND_REVERSE:
new_color = new_color & ~old_color; new_color = new_color & (~old_color | 0xFF000000);
break; break;


case GE_LOGIC_COPY: case GE_LOGIC_COPY:
//new_color = new_color; // No change to new_color.
break; break;


case GE_LOGIC_AND_INVERTED: case GE_LOGIC_AND_INVERTED:
new_color = ~new_color & old_color; new_color = (~new_color & (old_color & 0x00FFFFFF)) | (new_color & 0xFF000000);
break; break;


case GE_LOGIC_NOOP: case GE_LOGIC_NOOP:
new_color = old_color; new_color = (old_color & 0x00FFFFFF) | (new_color & 0xFF000000);
break; break;


case GE_LOGIC_XOR: case GE_LOGIC_XOR:
new_color = new_color ^ old_color; new_color = new_color ^ (old_color & 0x00FFFFFF);
break; break;


case GE_LOGIC_OR: case GE_LOGIC_OR:
new_color = new_color | old_color; new_color = new_color | (old_color & 0x00FFFFFF);
break; break;


case GE_LOGIC_NOR: case GE_LOGIC_NOR:
new_color = ~(new_color | old_color); new_color = (~(new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
break; break;


case GE_LOGIC_EQUIV: case GE_LOGIC_EQUIV:
new_color = ~(new_color ^ old_color); new_color = (~(new_color ^ old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
break; break;


case GE_LOGIC_INVERTED: case GE_LOGIC_INVERTED:
new_color = ~old_color; new_color = (~old_color & 0x00FFFFFF) | (new_color & 0xFF000000);
break; break;


case GE_LOGIC_OR_REVERSE: case GE_LOGIC_OR_REVERSE:
new_color = new_color | ~old_color; new_color = new_color | (~old_color & 0x00FFFFFF);
break; break;


case GE_LOGIC_COPY_INVERTED: case GE_LOGIC_COPY_INVERTED:
new_color = ~new_color; new_color = (~new_color & 0x00FFFFFF) | (new_color & 0xFF000000);
break; break;


case GE_LOGIC_OR_INVERTED: case GE_LOGIC_OR_INVERTED:
new_color = ~new_color | old_color; new_color = ((~new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
break; break;


case GE_LOGIC_NAND: case GE_LOGIC_NAND:
new_color = ~(new_color & old_color); new_color = (~(new_color & old_color) & 0x00FFFFFF) | (new_color & 0xFF000000);
break; break;


case GE_LOGIC_SET: case GE_LOGIC_SET:
new_color = 0xFFFFFFFF; new_color |= 0x00FFFFFF;
break; break;
} }


Expand All @@ -623,15 +622,25 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
// We can be accurate up to 24 bit integers, should be enough. // We can be accurate up to 24 bit integers, should be enough.
const __m128 p = _mm_cvtepi32_ps(prim_color.ivec); const __m128 p = _mm_cvtepi32_ps(prim_color.ivec);
const __m128 t = _mm_cvtepi32_ps(texcolor.ivec); const __m128 t = _mm_cvtepi32_ps(texcolor.ivec);
out_rgb.ivec = _mm_cvtps_epi32(_mm_div_ps(_mm_mul_ps(p, t), _mm_set_ps1(255.0f))); const __m128 b = _mm_mul_ps(p, t);
if (gstate.isColorDoublingEnabled()) {
// We double right here, only for modulate. Other tex funcs do not color double.
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(2.0f / 255.0f)));
} else {
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f)));
}


if (rgba) { if (rgba) {
return Vec4<int>(out_rgb.ivec); return Vec4<int>(out_rgb.ivec);
} else { } else {
out_a = prim_color.a(); out_a = prim_color.a();
} }
#else #else
out_rgb = prim_color.rgb() * texcolor.rgb() / 255; if (gstate.isColorDoublingEnabled()) {
out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255;
} else {
out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
}
out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a(); out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a();
#endif #endif
break; break;
Expand Down Expand Up @@ -895,41 +904,47 @@ static inline Vec3<int> AlphaBlendingResult(const Vec4<int> &source, const Vec4<


template <bool clearMode> template <bool clearMode>
inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<int> &color_in) { inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<int> &color_in) {
Vec4<int> prim_color = color_in; Vec4<int> prim_color = color_in.Clamp(0, 255);
// Depth range test // Depth range test - applied in clear mode, if not through mode.
// TODO: Clear mode?
if (!gstate.isModeThrough()) if (!gstate.isModeThrough())
if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
return; return;


if (gstate.isColorTestEnabled() && !clearMode)
if (!ColorTestPassed(prim_color.rgb()))
return;

// TODO: Does a need to be clamped?
if (gstate.isAlphaTestEnabled() && !clearMode) if (gstate.isAlphaTestEnabled() && !clearMode)
if (!AlphaTestPassed(prim_color.a())) if (!AlphaTestPassed(prim_color.a()))
return; return;


// Fog is applied prior to color test.
if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
prim_color.r() = fogColor.r();
prim_color.g() = fogColor.g();
prim_color.b() = fogColor.b();
}

if (gstate.isColorTestEnabled() && !clearMode)
if (!ColorTestPassed(prim_color.rgb()))
return;

// In clear mode, it uses the alpha color as stencil. // In clear mode, it uses the alpha color as stencil.
u8 stencil = clearMode ? prim_color.a() : GetPixelStencil(p.x, p.y); u8 stencil = clearMode ? prim_color.a() : GetPixelStencil(p.x, p.y);
// TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled? Probably yes
if (!clearMode && (gstate.isStencilTestEnabled() || gstate.isDepthTestEnabled())) { if (!clearMode && (gstate.isStencilTestEnabled() || gstate.isDepthTestEnabled())) {
if (gstate.isStencilTestEnabled() && !StencilTestPassed(stencil)) { if (gstate.isStencilTestEnabled() && !StencilTestPassed(stencil)) {
stencil = ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y); stencil = ApplyStencilOp(gstate.getStencilOpSFail(), stencil);
SetPixelStencil(p.x, p.y, stencil); SetPixelStencil(p.x, p.y, stencil);
return; return;
} }


// Also apply depth at the same time. If disabled, same as passing. // Also apply depth at the same time. If disabled, same as passing.
if (gstate.isDepthTestEnabled() && !DepthTestPassed(p.x, p.y, z)) { if (gstate.isDepthTestEnabled() && !DepthTestPassed(p.x, p.y, z)) {
if (gstate.isStencilTestEnabled()) { if (gstate.isStencilTestEnabled()) {
stencil = ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y); stencil = ApplyStencilOp(gstate.getStencilOpZFail(), stencil);
SetPixelStencil(p.x, p.y, stencil); SetPixelStencil(p.x, p.y, stencil);
} }
return; return;
} else if (gstate.isStencilTestEnabled()) { } else if (gstate.isStencilTestEnabled()) {
stencil = ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y); stencil = ApplyStencilOp(gstate.getStencilOpZPass(), stencil);
} }


if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled()) { if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled()) {
Expand All @@ -939,28 +954,12 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in
SetPixelDepth(p.x, p.y, z); SetPixelDepth(p.x, p.y, z);
} }


// Doubling happens only when texturing is enabled, and after tests.
if (gstate.isTextureMapEnabled() && gstate.isColorDoublingEnabled() && !clearMode) {
// TODO: Does this need to be clamped before blending?
prim_color.r() <<= 1;
prim_color.g() <<= 1;
prim_color.b() <<= 1;
}

if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
prim_color.r() = fogColor.r();
prim_color.g() = fogColor.g();
prim_color.b() = fogColor.b();
}

const u32 old_color = GetPixelColor(p.x, p.y); const u32 old_color = GetPixelColor(p.x, p.y);
u32 new_color; u32 new_color;


if (gstate.isAlphaBlendEnabled() && !clearMode) { if (gstate.isAlphaBlendEnabled() && !clearMode) {
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color); const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
// ToRGBA() always automatically clamps. // ToRGB() always automatically clamps.
new_color = AlphaBlendingResult(prim_color, dst).ToRGB(); new_color = AlphaBlendingResult(prim_color, dst).ToRGB();
new_color |= stencil << 24; new_color |= stencil << 24;
} else { } else {
Expand All @@ -974,8 +973,8 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in


// Logic ops are applied after blending (if blending is enabled.) // Logic ops are applied after blending (if blending is enabled.)
if (gstate.isLogicOpEnabled() && !clearMode) { if (gstate.isLogicOpEnabled() && !clearMode) {
// Logic ops don't affect stencil. // Logic ops don't affect stencil, which happens inside ApplyLogicOp.
new_color = (stencil << 24) | (ApplyLogicOp(gstate.getLogicOp(), old_color, new_color) & 0x00FFFFFF); new_color = ApplyLogicOp(gstate.getLogicOp(), old_color, new_color);
} }


if (clearMode) { if (clearMode) {
Expand Down

0 comments on commit e2a9f6a

Please sign in to comment.