Skip to content

Commit

Permalink
Merge pull request #6932 from unknownbrackets/d3d9
Browse files Browse the repository at this point in the history
d3d9: Try to reduce distance between vertex shaders
  • Loading branch information
hrydgard committed Sep 21, 2014
2 parents dda3ca3 + a62f88d commit 11a87b4
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 74 deletions.
105 changes: 78 additions & 27 deletions GPU/Directx9/PixelShaderGeneratorDX9.cpp
Expand Up @@ -17,7 +17,9 @@

#include <cstdio>

#include "PixelShaderGeneratorDX9.h"
#include "Core/Reporting.h"
#include "Core/Config.h"
#include "GPU/Directx9/PixelShaderGeneratorDX9.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"

Expand Down Expand Up @@ -150,14 +152,16 @@ static bool CanDoubleSrcBlendMode() {
// Here we must take all the bits of the gstate that determine what the fragment shader will
// look like, and concatenate them together into an ID.
void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) {
memset(&id->d[0], 0, sizeof(id->d));
int id0 = 0;
int id1 = 0;
if (gstate.isModeClear()) {
// We only need one clear shader, so let's ignore the rest of the bits.
id->d[0] = 1;
id0 = 1;
} else {
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !g_Config.bDisableAlphaTest;
bool alphaTestAgainstZero = IsAlphaTestAgainstZero();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = gstate.isColorDoublingEnabled();
// This isn't really correct, but it's a hack to get doubled blend modes to work more correctly.
Expand All @@ -169,31 +173,51 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) {
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;

// id->d[0] |= (gstate.isModeClear() & 1);
// id0 |= (gstate.isModeClear() & 1);
if (gstate.isTextureMapEnabled()) {
id->d[0] |= 1 << 1;
id->d[0] |= gstate.getTextureFunction() << 2;
id->d[0] |= (doTextureAlpha & 1) << 5; // rgb or rgba
id0 |= 1 << 1;
id0 |= gstate.getTextureFunction() << 2;
id0 |= (doTextureAlpha & 1) << 5; // rgb or rgba
id0 |= (gstate_c.flipTexture & 1) << 6;

if (gstate_c.needShaderTexClamp) {
bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0;
// 3 bits total.
id0 |= 1 << 7;
id0 |= gstate.isTexCoordClampedS() << 8;
id0 |= gstate.isTexCoordClampedT() << 9;
id0 |= (textureAtOffset & 1) << 10;
}
}

id->d[0] |= (lmode & 1) << 7;
id->d[0] |= enableAlphaTest << 8;
if (enableAlphaTest)
id->d[0] |= gstate.getAlphaTestFunction() << 9;
id->d[0] |= enableColorTest << 12;
if (enableColorTest)
id->d[0] |= gstate.getColorTestFunction() << 13; // color test func
id->d[0] |= (enableFog & 1) << 15;
id->d[0] |= (doTextureProjection & 1) << 16;
id->d[0] |= (enableColorDoubling & 1) << 17;
id->d[0] |= (enableAlphaDoubling & 1) << 18;
id->d[0] |= (gstate_c.bgraTexture & 1) << 19;
id0 |= (lmode & 1) << 11;
#if !defined(DX9_USE_HW_ALPHA_TEST)
if (enableAlphaTest) {
// 4 bits total.
id0 |= 1 << 12;
id0 |= gstate.getAlphaTestFunction() << 13;
}
#endif
if (enableColorTest) {
// 3 bits total.
id0 |= 1 << 16;
id0 |= gstate.getColorTestFunction() << 17;
}
id0 |= (enableFog & 1) << 19;
id0 |= (doTextureProjection & 1) << 20;
id0 |= (enableColorDoubling & 1) << 21;
id0 |= (enableAlphaDoubling & 1) << 22;
id0 |= (gstate_c.bgraTexture & 1) << 23;

id0 |= (alphaTestAgainstZero & 1) << 28;
if (enableAlphaTest)
gpuStats.numAlphaTestedDraws++;
else
gpuStats.numNonAlphaTestedDraws++;
}

id->d[0] = id0;
id->d[1] = id1;
}

// Missing: Z depth range
Expand All @@ -204,7 +228,8 @@ void GenerateFragmentShaderDX9(char *buffer) {
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
bool doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear() && !g_Config.bDisableAlphaTest;
bool alphaTestAgainstZero = IsAlphaTestAgainstZero();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear();
bool enableColorDoubling = gstate.isColorDoublingEnabled();
// This isn't really correct, but it's a hack to get doubled blend modes to work more correctly.
Expand Down Expand Up @@ -269,10 +294,17 @@ void GenerateFragmentShaderDX9(char *buffer) {
}

if (gstate.isTextureMapEnabled()) {
const char *texcoord = "In.v_texcoord";
if (doTextureProjection && gstate_c.flipTexture) {
// Since we need to flip v, we project manually.
WRITE(p, " float2 fixedcoord = float2(v_texcoord.x / v_texcoord.z, 1.0 - (v_texcoord.y / v_texcoord.z));\n");
texcoord = "fixedcoord";
doTextureProjection = false;
}
if (doTextureProjection) {
WRITE(p, " float4 t = tex2Dproj(tex, float4(In.v_texcoord.x, In.v_texcoord.y, 0, In.v_texcoord.z))%s;\n", gstate_c.bgraTexture ? ".bgra" : "");
} else {
WRITE(p, " float4 t = tex2D(tex, In.v_texcoord.xy)%s;\n", gstate_c.bgraTexture ? ".bgra" : "");
WRITE(p, " float4 t = tex2D(tex, %s.xy)%s;\n", texcoord, gstate_c.bgraTexture ? ".bgra" : "");
}
WRITE(p, " float4 p = In.v_color0;\n");

Expand Down Expand Up @@ -315,11 +347,30 @@ void GenerateFragmentShaderDX9(char *buffer) {

#if !defined(DX9_USE_HW_ALPHA_TEST)
if (enableAlphaTest) {
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; // never/always don't make sense
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
// TODO: Rewrite this to use clip() appropriately (like, clip(v.a - u_alphacolorref.a))
WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) clip(-1);\n", alphaTestFuncs[alphaTestFunc]);
if (alphaTestAgainstZero) {
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
// When testing against 0 (extremely common), we can avoid some math.
// 0.002 is approximately half of 1.0 / 255.0.
if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) {
WRITE(p, " clip(v.a - 0.002);\n");
} else if (alphaTestFunc != GE_COMP_NEVER) {
// Anything else is a test for == 0. Happens sometimes, actually...
WRITE(p, " clip(-v.a + 0.002);\n");
} else {
// NEVER has been logged as used by games, although it makes little sense - statically failing.
// Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here.
WRITE(p, " clip(-1);\n");
}
} else {
GEComparison alphaTestFunc = gstate.getAlphaTestFunction();
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; // never/always don't make sense
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
// TODO: Rewrite this to use clip() appropriately (like, clip(v.a - u_alphacolorref.a))
WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) clip(-1);\n", alphaTestFuncs[alphaTestFunc]);
} else {
// This means NEVER. See above.
WRITE(p, " clip(-1);\n");
}
}
}
#endif
Expand Down
93 changes: 48 additions & 45 deletions GPU/Directx9/VertexShaderGeneratorDX9.cpp
Expand Up @@ -28,6 +28,7 @@
#include "Core/Config.h"

#include "GPU/Directx9/VertexShaderGeneratorDX9.h"
#include "GPU/Common/VertexDecoderCommon.h"

#undef WRITE

Expand All @@ -41,16 +42,9 @@ bool CanUseHardwareTransformDX9(int prim) {
return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES;
}

int TranslateNumBonesDX9(int bones) {
if (!bones) return 0;
if (bones < 4) return 4;
// if (bones < 8) return 8; I get drawing problems in FF:CC with this!
return bones;
}

// prim so we can special case for RECTANGLES :(
void ComputeVertexShaderIDDX9(VertexShaderIDDX9 *id, u32 vertType, int prim, bool useHWTransform) {
int doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
bool doShadeMapping = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP;

Expand All @@ -60,54 +54,61 @@ void ComputeVertexShaderIDDX9(VertexShaderIDDX9 *id, u32 vertType, int prim, boo
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();

memset(id->d, 0, sizeof(id->d));
id->d[0] = lmode & 1;
id->d[0] |= ((int)gstate.isModeThrough()) << 1;
id->d[0] |= ((int)enableFog) << 2;
id->d[0] |= doTexture << 3;
id->d[0] |= (hasColor & 1) << 4;
int id0 = 0;
int id1 = 0;

id0 = lmode & 1;
id0 |= (gstate.isModeThrough() & 1) << 1;
id0 |= (enableFog & 1) << 2;
id0 |= (hasColor & 1) << 3;
if (doTexture) {
id->d[0] |= (gstate_c.flipTexture & 1) << 5;
id->d[0] |= (doTextureProjection & 1) << 6;
id0 |= 1 << 4;
id0 |= (gstate_c.flipTexture & 1) << 5;
id0 |= (doTextureProjection & 1) << 6;
}

if (useHWTransform) {
id->d[0] |= 1 << 8;
id->d[0] |= (hasNormal & 1) << 9;
id0 |= 1 << 8;
id0 |= (hasNormal & 1) << 9;

// UV generation mode
id->d[0] |= gstate.getUVGenMode() << 16;
id0 |= gstate.getUVGenMode() << 16;

// The next bits are used differently depending on UVgen mode
if (doTextureProjection) {
id->d[0] |= gstate.getUVProjMode() << 18;
id0 |= gstate.getUVProjMode() << 18;
} else if (doShadeMapping) {
id->d[0] |= gstate.getUVLS0() << 18;
id->d[0] |= gstate.getUVLS1() << 20;
id0 |= gstate.getUVLS0() << 18;
id0 |= gstate.getUVLS1() << 20;
}

// Bones
if (vertTypeIsSkinningEnabled(vertType))
id->d[0] |= (TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)) - 1) << 22;
id0 |= (TranslateNumBones(vertTypeGetNumBoneWeights(vertType)) - 1) << 22;

// Okay, d[1] coming up. ==============

if (gstate.isLightingEnabled() || doShadeMapping) {
// Light bits
for (int i = 0; i < 4; i++) {
id->d[1] |= gstate.getLightComputation(i) << (i * 4);
id->d[1] |= gstate.getLightType(i) << (i * 4 + 2);
id1 |= gstate.getLightComputation(i) << (i * 4);
id1 |= gstate.getLightType(i) << (i * 4 + 2);
}
id->d[1] |= (gstate.materialupdate & 7) << 16;
id1 |= (gstate.materialupdate & 7) << 16;
for (int i = 0; i < 4; i++) {
id->d[1] |= (gstate.isLightChanEnabled(i) & 1) << (20 + i);
id1 |= (gstate.isLightChanEnabled(i) & 1) << (20 + i);
}
// doShadeMapping is stored as UVGenMode, so this is enough for isLightingEnabled.
id1 |= 1 << 24;
}
id->d[1] |= gstate.isLightingEnabled() << 24;
id->d[1] |= (vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT) << 25;
id->d[1] |= gstate.areNormalsReversed() << 26;
id->d[1] |= (hasTexcoord & 1) << 27;
// 2 bits.
id1 |= (vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT) << 25;
id1 |= (gstate.areNormalsReversed() & 1) << 27;
id1 |= (hasTexcoord & 1) << 28;
}

id->d[0] = id0;
id->d[1] = id1;
}

static const char * const boneWeightAttrDecl[9] = {
Expand All @@ -132,8 +133,8 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
char *p = buffer;
const u32 vertType = gstate.vertType;

int lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
int doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
bool doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
bool doShadeMapping = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP;

Expand All @@ -144,7 +145,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0;
bool flipV = gstate_c.flipTexture;
bool flipNormal = gstate.areNormalsReversed();
bool prescale = g_Config.bPrescaleUV && !throughmode && gstate.getTextureFunction() == 0;
bool prescale = g_Config.bPrescaleUV && !throughmode && (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN);

DoLightComputation doLight[4] = {LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF};
if (useHWTransform) {
Expand Down Expand Up @@ -177,10 +178,10 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
// When transforming by hardware, we need a great deal more uniforms...
WRITE(p, "float4x3 u_world : register(c%i);\n", CONST_VS_WORLD);
WRITE(p, "float4x3 u_view : register(c%i);\n", CONST_VS_VIEW);
if (gstate.getUVGenMode() == 1)
if (doTextureProjection)
WRITE(p, "float4x3 u_texmtx : register(c%i);\n", CONST_VS_TEXMTX);
if (vertTypeIsSkinningEnabled(vertType)) {
int numBones = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
int numBones = TranslateNumBones(vertTypeGetNumBoneWeights(vertType));
#ifdef USE_BONE_ARRAY
WRITE(p, "float4x3 u_bone[%i] : register(c%i);\n", numBones, CONST_VS_BONE0);
#else
Expand All @@ -189,7 +190,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
}
#endif
}
if (doTexture) {
if (doTexture && (flipV || !prescale || gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP || gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX)) {
WRITE(p, "float4 u_uvscaleoffset : register(c%i);\n", CONST_VS_UVSCALEOFFSET);
}
for (int i = 0; i < 4; i++) {
Expand Down Expand Up @@ -228,7 +229,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
if (useHWTransform) {
WRITE(p, "struct VS_IN { \n");
if (vertTypeIsSkinningEnabled(vertType)) {
WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType))]);
WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBones(vertTypeGetNumBoneWeights(vertType))]);
}
if (doTexture && hasTexcoord) {
if (doTextureProjection)
Expand Down Expand Up @@ -306,11 +307,11 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
// No skinning, just standard T&L.
WRITE(p, " float3 worldpos = mul(float4(In.position.xyz, 1.0), u_world);\n");
if (hasNormal)
WRITE(p, " float3 worldnormal = normalize( mul(float4(In.normal, 0.0), u_world));\n", flipNormal ? "-" : "");
WRITE(p, " float3 worldnormal = normalize( mul(float4(%sIn.normal, 0.0), u_world));\n", flipNormal ? "-" : "");
else
WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
} else {
int numWeights = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
int numWeights = TranslateNumBones(vertTypeGetNumBoneWeights(vertType));

static const char * const boneWeightAttr[8] = {
"a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w",
Expand Down Expand Up @@ -449,6 +450,8 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {

if (poweredDiffuse) {
WRITE(p, " float dot%i = pow(dot(toLight, worldnormal), u_matspecular.a);\n", i);
// TODO: Somehow the NaN check from GLES seems unnecessary here?
// If it returned 0, it'd be wrong, so that's strange.
} else {
WRITE(p, " float dot%i = dot(toLight, worldnormal);\n", i);
}
Expand All @@ -464,6 +467,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%i, float3(1.0, distance, distance*distance)), 0.0, 1.0);\n", i);
break;
case GE_LIGHTTYPE_SPOT:
case GE_LIGHTTYPE_UNKNOWN:
WRITE(p, " float angle%i = dot(normalize(u_lightdir%i), toLight);\n", i, i);
WRITE(p, " if (angle%i >= u_lightangle%i) {\n", i, i);
WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%i, float3(1.0, distance, distance*distance)), 0.0, 1.0) * pow(angle%i, u_lightspotCoef%i);\n", i, i, i);
Expand Down Expand Up @@ -518,7 +522,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
switch (gstate.getUVGenMode()) {
case GE_TEXMAP_TEXTURE_COORDS: // Scale-offset. Easy.
case GE_TEXMAP_UNKNOWN: // Not sure what this is, but Riviera uses it. Treating as coords works.
if (prescale) {
if (prescale && !flipV) {
if (hasTexcoord) {
WRITE(p, " Out.v_texcoord = In.texcoord;\n");
} else {
Expand Down Expand Up @@ -562,9 +566,9 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
temp_tc = "float4(0.0, 0.0, 1.0, 1.0)";
break;
}
// Transform by texture matrix. XYZ as we are doing projection mapping.
WRITE(p, " Out.v_texcoord.xyz = mul(%s,u_texmtx) * float3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
}
// Transform by texture matrix. XYZ as we are doing projection mapping.
break;

case GE_TEXMAP_ENVIRONMENT_MAP: // Shade mapping - use dots from light sources.
Expand All @@ -576,17 +580,16 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
break;
}

if (flipV)
// Will flip in the fragment for GE_TEXMAP_TEXTURE_MATRIX.
if (flipV && gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_MATRIX)
WRITE(p, " Out.v_texcoord.y = 1.0 - Out.v_texcoord.y;\n");
}

// Compute fogdepth
if (enableFog)
WRITE(p, " Out.v_fogdepth.x = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n");

}

// WRITE(p, "Out.gl_Position.z = (Out.gl_Position.z + Out.gl_Position.w) * 0.5f;");
WRITE(p, " return Out;\n");
WRITE(p, "}\n");
}
Expand Down

0 comments on commit 11a87b4

Please sign in to comment.