Skip to content

Commit

Permalink
Manually revert the rest of the lmode optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed May 9, 2023
1 parent 0babac5 commit b42670c
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 36 deletions.
37 changes: 25 additions & 12 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu

p.ApplySamplerMetadata(arrayTexture ? samplersStereo : samplersMono);

bool lmode = id.Bit(FS_BIT_LMODE);
bool doTexture = id.Bit(FS_BIT_DO_TEXTURE);
bool enableFog = id.Bit(FS_BIT_ENABLE_FOG);
bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST);
Expand Down Expand Up @@ -205,7 +206,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu

// Note: the precision qualifiers must match the vertex shader!
WRITE(p, "layout (location = 1) %s in lowp vec4 v_color0;\n", shading);
WRITE(p, "layout (location = 2) %s in lowp vec3 v_color1;\n", shading);
if (lmode) {
WRITE(p, "layout (location = 2) %s in lowp vec3 v_color1;\n", shading);
}
WRITE(p, "layout (location = 3) in highp float v_fogdepth;\n");
if (doTexture) {
WRITE(p, "layout (location = 0) in highp vec3 v_texcoord;\n");
Expand Down Expand Up @@ -311,7 +314,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
const char *colorInterpolation = doFlatShading && compat.shaderLanguage == HLSL_D3D11 ? "nointerpolation " : "";
WRITE(p, " %svec4 v_color0: COLOR0;\n", colorInterpolation);
WRITE(p, " vec3 v_color1: COLOR1;\n");
if (lmode) {
WRITE(p, " vec3 v_color1: COLOR1;\n");
}
WRITE(p, " float v_fogdepth: TEXCOORD1;\n");
if (needFragCoord) {
if (compat.shaderLanguage == HLSL_D3D11) {
Expand Down Expand Up @@ -425,7 +430,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}

WRITE(p, "%s %s lowp vec4 v_color0;\n", shading, compat.varying_fs);
WRITE(p, "%s %s lowp vec3 v_color1;\n", shading, compat.varying_fs);
if (lmode) {
WRITE(p, "%s %s lowp vec3 v_color1;\n", shading, compat.varying_fs);
}
if (enableFog) {
*uniformMask |= DIRTY_FOGCOLOR;
WRITE(p, "uniform vec3 u_fogcolor;\n");
Expand Down Expand Up @@ -535,7 +542,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu

if (compat.shaderLanguage == HLSL_D3D11 || compat.shaderLanguage == HLSL_D3D9) {
WRITE(p, " vec4 v_color0 = In.v_color0;\n");
WRITE(p, " vec3 v_color1 = In.v_color1;\n");
if (lmode) {
WRITE(p, " vec3 v_color1 = In.v_color1;\n");
}
if (enableFog) {
WRITE(p, " float v_fogdepth = In.v_fogdepth;\n");
}
Expand Down Expand Up @@ -573,8 +582,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
// Clear mode does not allow any fancy shading.
WRITE(p, " vec4 v = v_color0;\n");
} else {
const char *secondary = "";
// Secondary color for specular on top of texture
WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n");
if (lmode) {
WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n");
secondary = " + s";
}

if (doTexture) {
char texcoord[64] = "v_texcoord";
Expand Down Expand Up @@ -831,28 +844,28 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu

switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t + s;\n");
WRITE(p, " vec4 v = p * t%s;\n", secondary);
break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n");
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary);
break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n");
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary);
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 r = t;\n");
WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlpha);\n");
WRITE(p, " vec4 v = r + s;\n");
WRITE(p, " vec4 v = r%s;\n", secondary);
break;
case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n");
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary);
break;
default:
// Doesn't happen
WRITE(p, " vec4 v = p + s;\n"); break;
WRITE(p, " vec4 v = p%s;\n", secondary); break;
break;
}

Expand All @@ -861,7 +874,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " v.rgb = clamp(v.rgb * u_texMul, 0.0, 1.0);\n");
} else {
// No texture mapping
WRITE(p, " vec4 v = v_color0 + s;\n");
WRITE(p, " vec4 v = v_color0%s;\n", secondary);
}

if (enableFog) {
Expand Down
6 changes: 4 additions & 2 deletions GPU/Common/GeometryShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
}
varyings.push_back(VaryingDef{ "vec4", "v_color0", Draw::SEM_COLOR0, 1, "lowp" });
outVaryings.push_back(VaryingDef{ "vec4", "v_color0Out", Draw::SEM_COLOR0, 1, "lowp" });
varyings.push_back(VaryingDef{ "vec3", "v_color1", Draw::SEM_COLOR1, 2, "lowp" });
outVaryings.push_back(VaryingDef{ "vec3", "v_color1Out", Draw::SEM_COLOR1, 2, "lowp" });
if (id.Bit(GS_BIT_LMODE)) {
varyings.push_back(VaryingDef{ "vec3", "v_color1", Draw::SEM_COLOR1, 2, "lowp" });
outVaryings.push_back(VaryingDef{ "vec3", "v_color1Out", Draw::SEM_COLOR1, 2, "lowp" });
}
varyings.push_back(VaryingDef{ "float", "v_fogdepth", Draw::SEM_TEXCOORD1, 3, "highp" });
outVaryings.push_back(VaryingDef{ "float", "v_fogdepthOut", Draw::SEM_TEXCOORD1, 3, "highp" });

Expand Down
8 changes: 8 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_DO_TEXTURE)) desc << (id.Bit(FS_BIT_3D_TEXTURE) ? "Tex3D " : "Tex ");
if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj ";
if (id.Bit(FS_BIT_ENABLE_FOG)) desc << "Fog ";
if (id.Bit(FS_BIT_LMODE)) desc << "LM ";
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
if (id.Bit(FS_BIT_BGRA_TEXTURE)) desc << "BGRA ";
switch ((ShaderDepalMode)id.Bits(FS_BIT_SHADER_DEPAL_MODE, 2)) {
Expand Down Expand Up @@ -280,6 +281,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_CLEARMODE);
} else {
bool isModeThrough = gstate.isModeThrough();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough;
bool enableFog = gstate.isFogEnabled() && !isModeThrough;
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
Expand Down Expand Up @@ -309,6 +311,8 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D);
}

id.SetBit(FS_BIT_LMODE, lmode);

if (enableAlphaTest) {
// 5 bits total.
id.SetBit(FS_BIT_ALPHA_TEST);
Expand Down Expand Up @@ -391,6 +395,7 @@ std::string GeometryShaderDesc(const GShaderID &id) {
desc << StringFromFormat("%08x:%08x ", id.d[1], id.d[0]);
if (id.Bit(GS_BIT_ENABLED)) desc << "ENABLED ";
if (id.Bit(GS_BIT_DO_TEXTURE)) desc << "TEX ";
if (id.Bit(GS_BIT_LMODE)) desc << "LM ";
return desc.str();
}

Expand Down Expand Up @@ -422,6 +427,9 @@ void ComputeGeometryShaderID(GShaderID *id_out, const Draw::Bugs &bugs, int prim
if (gstate.isModeClear()) {
// No attribute bits.
} else {
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough;

id.SetBit(GS_BIT_LMODE, lmode);
if (gstate.isTextureMapEnabled()) {
id.SetBit(GS_BIT_DO_TEXTURE);
}
Expand Down
6 changes: 4 additions & 2 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ enum FShaderBit : uint8_t {
FS_BIT_SHADER_TEX_CLAMP = 7,
FS_BIT_CLAMP_S = 8,
FS_BIT_CLAMP_T = 9,
// 2 bits free
// 1 bit free
FS_BIT_LMODE = 11,
FS_BIT_ALPHA_TEST = 12,
FS_BIT_ALPHA_TEST_FUNC = 13, // 3 bits
FS_BIT_ALPHA_AGAINST_ZERO = 16,
Expand Down Expand Up @@ -110,7 +111,8 @@ static inline FShaderBit operator +(FShaderBit bit, int i) {
enum GShaderBit : uint8_t {
GS_BIT_ENABLED = 0, // If not set, we don't use a geo shader.
GS_BIT_DO_TEXTURE = 1, // presence of texcoords
GS_BIT_CURVE = 2, // curve, which means don't do range culling.
GS_BIT_LMODE = 2,
GS_BIT_CURVE = 3, // curve, which means don't do range culling.
};

static inline GShaderBit operator +(GShaderBit bit, int i) {
Expand Down
48 changes: 30 additions & 18 deletions GPU/Common/VertexShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}

WRITE(p, "layout (location = 1) %sout lowp vec4 v_color0;\n", shading);
WRITE(p, "layout (location = 2) %sout lowp vec3 v_color1;\n", shading);
if (lmode) {
WRITE(p, "layout (location = 2) %sout lowp vec3 v_color1;\n", shading);
}

WRITE(p, "layout (location = 0) out highp vec3 v_texcoord;\n");

Expand Down Expand Up @@ -416,7 +418,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " vec3 v_texcoord : TEXCOORD0;\n");
const char *colorInterpolation = doFlatShading && compat.shaderLanguage == HLSL_D3D11 ? "nointerpolation " : "";
WRITE(p, " %svec4 v_color0 : COLOR0;\n", colorInterpolation);
WRITE(p, " vec3 v_color1 : COLOR1;\n");
if (lmode) {
WRITE(p, " vec3 v_color1 : COLOR1;\n");
}

WRITE(p, " float v_fogdepth : TEXCOORD1;\n");
if (compat.shaderLanguage == HLSL_D3D9) {
Expand Down Expand Up @@ -587,7 +591,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}

WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, compat.varying_vs);
WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, compat.varying_vs);
if (lmode) {
WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, compat.varying_vs);
}

WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_vs, highpTexcoord ? "highp" : "mediump");

Expand Down Expand Up @@ -819,13 +825,14 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}
if (hasColor) {
WRITE(p, " %sv_color0 = color0;\n", compat.vsOutPrefix);
if (lmode)
if (lmode) {
WRITE(p, " %sv_color1 = color1;\n", compat.vsOutPrefix);
else
WRITE(p, " %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
}
} else {
WRITE(p, " %sv_color0 = u_matambientalpha;\n", compat.vsOutPrefix);
WRITE(p, " %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
if (lmode) {
WRITE(p, " %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
}
}
WRITE(p, " %sv_fogdepth = fog;\n", compat.vsOutPrefix);
if (isModeThrough) {
Expand Down Expand Up @@ -970,6 +977,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
bool distanceNeeded = false;
bool anySpots = false;
if (enableLighting) {

p.C(" lowp vec4 lightSum0 = u_ambient * ambientColor + vec4(u_matemissive, 0.0);\n");

for (int i = 0; i < 4; i++) {
Expand Down Expand Up @@ -1153,18 +1161,20 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}

if (enableLighting) {
WRITE(p, " lightSum0 = clamp(lightSum0, 0.0, 1.0);\n");
if (specularIsZero) {
WRITE(p, " %sv_color0 = lightSum0;\n", compat.vsOutPrefix);
WRITE(p, " %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
} else {
if (lmode) {
WRITE(p, " %sv_color0 = lightSum0;\n", compat.vsOutPrefix);
// v_color1 only exists when lmode = 1.
// Sum up ambient, emissive here.
if (lmode) {
WRITE(p, " %sv_color0 = clamp(lightSum0, 0.0, 1.0);\n", compat.vsOutPrefix);
// v_color1 only exists when lmode = 1.
if (specularIsZero) {
WRITE(p, " %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
} else {
WRITE(p, " %sv_color1 = clamp(lightSum1, 0.0, 1.0);\n", compat.vsOutPrefix);
}
} else {
if (specularIsZero) {
WRITE(p, " %sv_color0 = clamp(lightSum0, 0.0, 1.0);\n", compat.vsOutPrefix);
} else {
WRITE(p, " %sv_color0 = clamp(lightSum0 + vec4(lightSum1, 0.0), 0.0, 1.0);\n", compat.vsOutPrefix);
WRITE(p, " %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
WRITE(p, " %sv_color0 = clamp(clamp(lightSum0, 0.0, 1.0) + vec4(lightSum1, 0.0), 0.0, 1.0);\n", compat.vsOutPrefix);
}
}
} else {
Expand All @@ -1180,7 +1190,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " %sv_color0.r += 0.000001;\n", compat.vsOutPrefix);
}
}
WRITE(p, " %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
if (lmode) {
WRITE(p, " %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
}
}

bool scaleUV = !isModeThrough && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN);
Expand Down
3 changes: 2 additions & 1 deletion GPU/GLES/ShaderManagerGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,7 @@ LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs,
shaderSwitchDirtyUniforms_ = 0;

if (ls == nullptr) {
_dbg_assert_(FSID.Bit(FS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE));
_dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE));

if (vs == nullptr || fs == nullptr) {
Expand Down Expand Up @@ -965,7 +966,7 @@ enum class CacheDetectFlags {
};

#define CACHE_HEADER_MAGIC 0x83277592
#define CACHE_VERSION 29
#define CACHE_VERSION 30

struct CacheHeader {
uint32_t magic;
Expand Down
6 changes: 5 additions & 1 deletion GPU/Vulkan/ShaderManagerVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ void ShaderManagerVulkan::GetShaders(int prim, VertexDecoder *decoder, VulkanVer
}

_dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE));
_dbg_assert_(FSID.Bit(FS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE));
if (GSID.Bit(GS_BIT_ENABLED)) {
_dbg_assert_(GSID.Bit(GS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE));
}

// Just update uniforms if this is the same shader as last time.
if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_ && GSID == lastGSID_) {
Expand Down Expand Up @@ -516,7 +520,7 @@ enum class VulkanCacheDetectFlags {
};

#define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 43
#define CACHE_VERSION 44

struct VulkanCacheHeader {
uint32_t magic;
Expand Down

0 comments on commit b42670c

Please sign in to comment.