252 changes: 249 additions & 3 deletions Source/Core/VideoCommon/Src/LightingShaderGen.h
Expand Up @@ -5,9 +5,255 @@
#ifndef _LIGHTINGSHADERGEN_H_
#define _LIGHTINGSHADERGEN_H_

#include "CommonTypes.h"
#include "ShaderGenCommon.h"
#include "NativeVertexFormat.h"
#include "XFMemory.h"

int GetLightingShaderId(u32* out);
char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest);
static const char* LightCol(const char* lightsName, unsigned int index, const char* swizzle)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d].%s", lightsName, index, swizzle);
return result;
}

static const char* LightCosAtt(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+1]", lightsName, index);
return result;
}

static const char* LightDistAtt(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+2]", lightsName, index);
return result;
}

static const char* LightPos(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+3]", lightsName, index);
return result;
}

static const char* LightDir(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+4]", lightsName, index);
return result;
}

template<class T>
static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha)
{
const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index];
const char* swizzle = "xyzw";
if (coloralpha == 1)
swizzle = "xyz";
else if (coloralpha == 2)
swizzle = "w";

uid_data.attnfunc |= chan.attnfunc << (2*litchan_index);
uid_data.diffusefunc |= chan.diffusefunc << (2*litchan_index);
if (!(chan.attnfunc & 1))
{
// atten disabled
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
object.Write("lacc.%s += %s;\n", swizzle, LightCol(lightsName, index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(lightsName, index));
object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s;\n",
swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", LightCol(lightsName, index, swizzle));
break;
default: _assert_(0);
}
}
else // spec and spot
{
if (chan.attnfunc == 3)
{ // spot
object.Write("ldir = %s.xyz - pos.xyz;\n", LightPos(lightsName, index));
object.Write("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0f, dot(ldir, %s.xyz));\n", LightDir(lightsName, index));
object.Write("attn = max(0.0f, dot(%s.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.xyz, float3(1.0f,dist,dist2));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index));
}
else if (chan.attnfunc == 1)
{ // specular
object.Write("ldir = normalize(%s.xyz);\n", LightPos(lightsName, index));
object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.xyz)) : 0.0f;\n", LightDir(lightsName, index));
object.Write("attn = max(0.0f, dot(%s.xyz, float3(1,attn,attn*attn))) / dot(%s.xyz, float3(1,attn,attn*attn));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index));
}

switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
object.Write("lacc.%s += attn * %s;\n", swizzle, LightCol(lightsName, index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s;\n",
swizzle,
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(",
LightCol(lightsName, index, swizzle));
break;
default: _assert_(0);
}
}
object.Write("\n");
}

// vertex shader
// lights/colors
// materials name is I_MATERIALS in vs and I_PMATERIALS in ps
// inColorName is color in vs and colors_ in ps
// dest is o.colors_ in vs and colors_ in ps
template<class T>
static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest)
{
for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++)
{
const LitChannel& color = xfregs.color[j];
const LitChannel& alpha = xfregs.alpha[j];

object.Write("{\n");

uid_data.matsource |= xfregs.color[j].matsource << j;
if (color.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
object.Write("mat = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat = %s0;\n", inColorName);
else
object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
else // from color
{
object.Write("mat = %s[%d];\n", materialsName, j+2);
}

uid_data.enablelighting |= xfregs.color[j].enablelighting << j;
if (color.enablelighting)
{
uid_data.ambsource |= xfregs.color[j].ambsource << j;
if (color.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
object.Write("lacc = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
object.Write("lacc = %s0;\n", inColorName);
else
object.Write("lacc = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
}
else // from color
{
object.Write("lacc = %s[%d];\n", materialsName, j);
}
}
else
{
object.Write("lacc = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}

// check if alpha is different
uid_data.matsource |= xfregs.alpha[j].matsource << (j+2);
if (alpha.matsource != color.matsource)
{
if (alpha.matsource) // from vertex
{
if (components & (VB_HAS_COL0<<j))
object.Write("mat.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat.w = %s0.w;\n", inColorName);
else object.Write("mat.w = 1.0f;\n");
}
else // from color
{
object.Write("mat.w = %s[%d].w;\n", materialsName, j+2);
}
}

uid_data.enablelighting |= xfregs.alpha[j].enablelighting << (j+2);
if (alpha.enablelighting)
{
uid_data.ambsource |= xfregs.alpha[j].ambsource << (j+2);
if (alpha.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
object.Write("lacc.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
object.Write("lacc.w = %s0.w;\n", inColorName);
else
object.Write("lacc.w = 0.0f;\n");
}
else // from color
{
object.Write("lacc.w = %s[%d].w;\n", materialsName, j);
}
}
else
{
object.Write("lacc.w = 1.0f;\n");
}

if(color.enablelighting && alpha.enablelighting)
{
// both have lighting, test if they use the same lights
int mask = 0;
uid_data.attnfunc |= color.attnfunc << (2*j);
uid_data.attnfunc |= alpha.attnfunc << (2*(j+2));
uid_data.diffusefunc |= color.diffusefunc << (2*j);
uid_data.diffusefunc |= alpha.diffusefunc << (2*(j+2));
uid_data.light_mask |= color.GetFullLightMask() << (8*j);
uid_data.light_mask |= alpha.GetFullLightMask() << (8*(j+2));
if(color.lightparams == alpha.lightparams)
{
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
if(mask)
{
for (int i = 0; i < 8; ++i)
{
if (mask & (1<<i))
{
GenerateLightShader<T>(object, uid_data, i, j, lightsName, 3);
}
}
}
}

// no shared lights
for (int i = 0; i < 8; ++i)
{
if (!(mask&(1<<i)) && (color.GetFullLightMask() & (1<<i)))
GenerateLightShader<T>(object, uid_data, i, j, lightsName, 1);
if (!(mask&(1<<i)) && (alpha.GetFullLightMask() & (1<<i)))
GenerateLightShader<T>(object, uid_data, i, j+2, lightsName, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
{
// lights are disabled on one channel so process only the active ones
const LitChannel& workingchannel = color.enablelighting ? color : alpha;
const int lit_index = color.enablelighting ? j : (j+2);
int coloralpha = color.enablelighting ? 1 : 2;

uid_data.light_mask |= workingchannel.GetFullLightMask() << (8*lit_index);
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1<<i))
GenerateLightShader<T>(object, uid_data, i, lit_index, lightsName, coloralpha);
}
}
object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j);
object.Write("}\n");
}
}

#endif // _LIGHTINGSHADERGEN_H_
984 changes: 426 additions & 558 deletions Source/Core/VideoCommon/Src/PixelShaderGen.cpp

Large diffs are not rendered by default.

193 changes: 123 additions & 70 deletions Source/Core/VideoCommon/Src/PixelShaderGen.h
Expand Up @@ -6,6 +6,8 @@
#define GCOGL_PIXELSHADER_H

#include "VideoCommon.h"
#include "ShaderGenCommon.h"
#include "BPMemory.h"

#define I_COLORS "color"
#define I_KCOLORS "k"
Expand All @@ -31,8 +33,14 @@
#define C_PLIGHTS (C_FOG + 3)
#define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES 70
#define PIXELSHADERUID_MAX_VALUES_SAFE 116

// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
{
DSTALPHA_NONE, // Render normally, without destination alpha
DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
};

// Annoying sure, can be removed once we get up to GLSL ~1.3
const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },
Expand All @@ -47,90 +55,135 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },
{I_PMATERIALS, C_PMATERIALS, 4 },
};

// DO NOT make anything in this class virtual.
template<bool safe>
class _PIXELSHADERUID
// TODO: Should compact packing be enabled?
//#pragma pack(4)
struct pixel_shader_uid_data
{
public:
u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES];
int num_values;
// TODO: Optimize field order for easy access!

_PIXELSHADERUID()
{
}
u32 components;
u32 dstAlphaMode : 2;
u32 Pretest : 2;

_PIXELSHADERUID(const _PIXELSHADERUID& r)
{
num_values = r.num_values;
u32 genMode_numtexgens : 4;
u32 genMode_numtevstages : 4;
u32 genMode_numindstages : 3;

if (safe)
memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE);
else
memcpy(values, r.values, r.GetNumValues() * sizeof(values[0]));
}
u32 nIndirectStagesUsed : 8;

int GetNumValues() const
{
if (safe)
return (sizeof(values) / sizeof(u32));
else
return num_values;
}
u32 texMtxInfo_n_projection : 8; // 8x1 bit

bool operator <(const _PIXELSHADERUID& _Right) const
u32 tevindref_bi0 : 3;
u32 tevindref_bc0 : 3;
u32 tevindref_bi1 : 3;
u32 tevindref_bc1 : 3;
u32 tevindref_bi2 : 3;
u32 tevindref_bc3 : 3;
u32 tevindref_bi4 : 3;
u32 tevindref_bc4 : 3;
inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap)
{
int N = GetNumValues();

if (N < _Right.GetNumValues())
return true;
else if (N > _Right.GetNumValues())
return false;

for (int i = 0; i < N; ++i)
{
if (values[i] < _Right.values[i])
return true;
else if (values[i] > _Right.values[i])
return false;
}

return false;
if (index == 0) { tevindref_bc0 = texcoord; tevindref_bi0 = texmap; }
else if (index == 1) { tevindref_bc1 = texcoord; tevindref_bi1 = texmap; }
else if (index == 2) { tevindref_bc3 = texcoord; tevindref_bi2 = texmap; }
else if (index == 3) { tevindref_bc4 = texcoord; tevindref_bi4 = texmap; }
}

bool operator ==(const _PIXELSHADERUID& _Right) const
inline void SetTevindrefTexmap(int index, u32 texmap)
{
int N = GetNumValues();
if (index == 0) { tevindref_bi0 = texmap; }
else if (index == 1) { tevindref_bi1 = texmap; }
else if (index == 2) { tevindref_bi2 = texmap; }
else if (index == 3) { tevindref_bi4 = texmap; }
}

if (N != _Right.GetNumValues())
return false;
u64 tevorders_n_texcoord : 48; // 16 x 3 bits

for (int i = 0; i < N; ++i)
{
if (values[i] != _Right.values[i])
return false;
}
u64 tevind_n_sw : 48; // 16 x 3 bits
u64 tevind_n_tw : 48; // 16 x 3 bits
u32 tevind_n_fb_addprev : 16; // 16 x 1 bit
u32 tevind_n_bs : 32; // 16 x 2 bits
u32 tevind_n_fmt : 32; // 16 x 2 bits
u32 tevind_n_bt : 32; // 16 x 2 bits
u64 tevind_n_bias : 48; // 16 x 3 bits
u64 tevind_n_mid : 64; // 16 x 4 bits

return true;
// NOTE: These assume that the affected bits are zero before calling
void Set_tevind_sw(int index, u64 val)
{
tevind_n_sw |= val << (3*index);
}
void Set_tevind_tw(int index, u64 val)
{
tevind_n_tw |= val << (3*index);
}
void Set_tevind_bias(int index, u64 val)
{
tevind_n_bias |= val << (3*index);
}
void Set_tevind_mid(int index, u64 val)
{
tevind_n_mid |= val << (4*index);
}
};

typedef _PIXELSHADERUID<false> PIXELSHADERUID;
typedef _PIXELSHADERUID<true> PIXELSHADERUIDSAFE;

// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
{
DSTALPHA_NONE, // Render normally, without destination alpha
DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
u32 tevksel_n_swap1 : 16; // 8x2 bits
u32 tevksel_n_swap2 : 16; // 8x2 bits
u64 tevksel_n_kcsel0 : 40; // 8x5 bits
u64 tevksel_n_kasel0 : 40; // 8x5 bits
u64 tevksel_n_kcsel1 : 40; // 8x5 bits
u64 tevksel_n_kasel1 : 40; // 8x5 bits
void set_tevksel_kcsel(int index, int i, u64 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); }
void set_tevksel_kasel(int index, int i, u64 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); }

u64 cc_n_d : 64; // 16x4 bits
u64 cc_n_c : 64; // 16x4 bits
u64 cc_n_b : 64; // 16x4 bits
u64 cc_n_a : 64; // 16x4 bits
u32 cc_n_bias : 32; // 16x2 bits
u32 cc_n_op : 16; // 16x1 bit
u32 cc_n_clamp : 16; // 16x1 bit
u32 cc_n_shift : 32; // 16x2 bits
u32 cc_n_dest : 32; // 16x2 bits

u32 ac_n_rswap : 32; // 16x2 bits
u32 ac_n_tswap : 32; // 16x2 bits
u64 ac_n_d : 48; // 16x3 bits
u64 ac_n_c : 48; // 16x3 bits
u64 ac_n_b : 48; // 16x3 bits
u64 ac_n_a : 48; // 16x3 bits
u32 ac_n_bias : 32; // 16x2 bits
u32 ac_n_op : 16; // 16x1 bit
u32 ac_n_clamp : 16; // 16x1 bit
u32 ac_n_shift : 32; // 16x2 bits
u32 ac_n_dest : 32; // 16x2 bits

u32 alpha_test_comp0 : 3;
u32 alpha_test_comp1 : 3;
u32 alpha_test_logic : 2;
u32 alpha_test_use_zcomploc_hack : 1;

u32 fog_proj : 1;
u32 fog_fsel : 3;
u32 fog_RangeBaseEnabled : 1;

u32 ztex_op : 2;

u32 fast_depth_calc : 1;
u32 per_pixel_depth : 1;
u32 bHasIndStage : 16;

u32 xfregs_numTexGen_numTexGens : 4;

// TODO: I think we're fine without an enablePixelLighting field, should probably double check, though..
LightingUidData lighting;
};
//#pragma pack()

const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);

void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
typedef ShaderCode PixelShaderCode; // TODO: Obsolete
typedef ShaderConstantProfile PixelShaderConstantProfile; // TODO: Obsolete

// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components);
void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);

#endif // GCOGL_PIXELSHADER_H
176 changes: 119 additions & 57 deletions Source/Core/VideoCommon/Src/PixelShaderManager.cpp
Expand Up @@ -29,19 +29,45 @@ static u32 lastTexDims[8]; // width | height << 16 | wrap_s << 28 | wrap_t << 30
static u32 lastZBias;
static int nMaterialsChanged;

static float s_constant_cache[C_PENVCONST_END*4];

inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{
// if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 &&
// s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4)
// return;

g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4);
s_constant_cache[const_number*4] = f1;
s_constant_cache[const_number*4+1] = f2;
s_constant_cache[const_number*4+2] = f3;
s_constant_cache[const_number*4+3] = f4;
}

inline void SetPSConstant4fv(unsigned int const_number, const float *f)
{
// if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] &&
// s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3])
// return;

g_renderer->SetPSConstant4fv(const_number, f);
s_constant_cache[const_number*4] = f[0];
s_constant_cache[const_number*4+1] = f[1];
s_constant_cache[const_number*4+2] = f[2];
s_constant_cache[const_number*4+3] = f[3];
}

inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
{
// for (unsigned int i = 0; i < 4*count; ++i)
// if (s_constant_cache[const_number*4+i] != f[i])
// break;
// else if (i == 4*count-1)
// return;

g_renderer->SetMultiPSConstant4fv(const_number, count, f);
for (unsigned int i = 0; i < 4*count; ++i)
s_constant_cache[const_number*4+i] = f[i];
}

void PixelShaderManager::Init()
Expand All @@ -50,6 +76,7 @@ void PixelShaderManager::Init()
memset(lastTexDims, 0, sizeof(lastTexDims));
lastZBias = 0;
memset(lastRGBAfull, 0, sizeof(lastRGBAfull));
memset(s_constant_cache, 0, sizeof(s_constant_cache)); // TODO: Should reflect that on the GPU side....
Dirty();
}

Expand All @@ -70,42 +97,62 @@ void PixelShaderManager::Shutdown()

}

void PixelShaderManager::SetConstants()
void PixelShaderManager::SetConstants(u32 components)
{
if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO)
Dirty();

// TODO: Probably broken in the non-UBO path
PixelShaderConstantProfile constant_profile(C_PENVCONST_END);
/// TODO: dst alpha/api/components type parameter...
GetPixelShaderConstantProfile(constant_profile, DSTALPHA_DUAL_SOURCE_BLEND, API_OPENGL, components);

static int saved_updates = 0;
static int necessary_updates = 0;

// TODO: Remove this!
#define IncStuff() { \
saved_updates++; \
/*printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates);*/ }

for (int i = 0; i < 2; ++i)
{
if (s_nColorsChanged[i])
{
int baseind = i ? C_KCOLORS : C_COLORS;
for (int j = 0; j < 4; ++j)
{
if (s_nColorsChanged[i] & (1 << j))
if ((s_nColorsChanged[i] & (1 << j)) && constant_profile.ConstantIsUsed(baseind+j))
{
SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]);
s_nColorsChanged[i] &= ~(1<<j);
++necessary_updates;
} else if ((s_nColorsChanged[i] & (1 << j))) IncStuff();
}
s_nColorsChanged[i] = 0;
}
}

if (s_nTexDimsChanged)
if (s_nTexDimsChanged)
{
for (int i = 0; i < 8; ++i)
{
if (s_nTexDimsChanged & (1<<i))
if ((s_nTexDimsChanged & (1<<i)) && constant_profile.ConstantIsUsed(C_TEXDIMS+i))
{
++necessary_updates;
SetPSTextureDims(i);
}
s_nTexDimsChanged = 0;
}
s_nTexDimsChanged &= ~(1<<i);
}else if (s_nTexDimsChanged & (1<<i)) IncStuff();
}
}

if (s_bAlphaChanged)
if (s_bAlphaChanged && constant_profile.ConstantIsUsed(C_ALPHA))
{
++necessary_updates;
SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f);
s_bAlphaChanged = false;
}
} else if (s_bAlphaChanged) IncStuff();

if (s_bZTextureTypeChanged)
if (s_bZTextureTypeChanged && constant_profile.ConstantIsUsed(C_ZBIAS))
{
float ftemp[4];
switch (bpmem.ztex2.type)
Expand All @@ -121,13 +168,14 @@ void PixelShaderManager::SetConstants()
case 2:
// 24 bits
ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0;
break;
}
break;
}
++necessary_updates;
SetPSConstant4fv(C_ZBIAS, ftemp);
s_bZTextureTypeChanged = false;
}
} else if (s_bZTextureTypeChanged) IncStuff();

if (s_bZBiasChanged || s_bDepthRangeChanged)
if ((s_bZBiasChanged || s_bDepthRangeChanged) && constant_profile.ConstantIsUsed(C_ZBIAS+1))
{
// reversed gxsetviewport(xorig, yorig, width, height, nearz, farz)
// [0] = width/2
Expand All @@ -138,55 +186,62 @@ void PixelShaderManager::SetConstants()
// [5] = 16777215 * farz

//ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias);
++necessary_updates;
SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f);
s_bZBiasChanged = s_bDepthRangeChanged = false;
}
}else if ((s_bZBiasChanged || s_bDepthRangeChanged)) IncStuff();

// indirect incoming texture scales
if (s_nIndTexScaleChanged)
{
// set as two sets of vec4s, each containing S and T of two ind stages.
float f[8];

if (s_nIndTexScaleChanged & 0x03)
if ((s_nIndTexScaleChanged & 0x03) && constant_profile.ConstantIsUsed(C_INDTEXSCALE))
{
for (u32 i = 0; i < 2; ++i)
{
f[2 * i] = bpmem.texscale[0].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
}
f[2 * i] = bpmem.texscale[0].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
}
++necessary_updates;
SetPSConstant4fv(C_INDTEXSCALE, f);
}
s_nIndTexScaleChanged &= ~0x03;
}
else if ((s_nIndTexScaleChanged & 0x03)) IncStuff();

if (s_nIndTexScaleChanged & 0x0c)
if ((s_nIndTexScaleChanged & 0x0c) && constant_profile.ConstantIsUsed(C_INDTEXSCALE+1))
{
for (u32 i = 2; i < 4; ++i)
for (u32 i = 2; i < 4; ++i)
{
f[2 * i] = bpmem.texscale[1].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
}
f[2 * i] = bpmem.texscale[1].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
}
++necessary_updates;
SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]);
}

s_nIndTexScaleChanged = 0;
}
s_nIndTexScaleChanged &= ~0x0c;
}
else if ((s_nIndTexScaleChanged & 0x0c)) IncStuff();
}

if (s_nIndTexMtxChanged)
{
for (int i = 0; i < 3; ++i)
{
if (s_nIndTexMtxChanged & (1 << i))
if ((s_nIndTexMtxChanged & (1 << i)) && (constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i) || constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i+1)))
{
int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) |
((u32)bpmem.indmtx[i].col1.s1 << 2) |
((u32)bpmem.indmtx[i].col2.s2 << 4);
float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;

// xyz - static matrix
// TODO w - dynamic matrix scale / 256...... somehow / 4 works better
// rev 2972 - now using / 256.... verify that this works
int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) |
((u32)bpmem.indmtx[i].col1.s1 << 2) |
((u32)bpmem.indmtx[i].col2.s2 << 4);
float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;

// xyz - static matrix
// TODO w - dynamic matrix scale / 256...... somehow / 4 works better
// rev 2972 - now using / 256.... verify that this works
++necessary_updates;
++necessary_updates;
SetPSConstant4f(C_INDTEXMTX + 2 * i,
bpmem.indmtx[i].col0.ma * fscale,
bpmem.indmtx[i].col1.mc * fscale,
Expand All @@ -198,23 +253,26 @@ void PixelShaderManager::SetConstants()
bpmem.indmtx[i].col2.mf * fscale,
fscale * 4.0f);

PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n",
i, 1024.0f*fscale,
bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale,
bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale);
}
}
s_nIndTexMtxChanged = 0;
}
PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n",
i, 1024.0f*fscale,
bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale,
bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale);

s_nIndTexMtxChanged &= ~(1 << i);
}else if ((s_nIndTexMtxChanged & (1 << i))) {IncStuff();IncStuff();}
}
}

if (s_bFogColorChanged)
if (s_bFogColorChanged && constant_profile.ConstantIsUsed(C_FOG))
{
++necessary_updates;
SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0);
s_bFogColorChanged = false;
}
}else if (s_bFogColorChanged) IncStuff();

if (s_bFogParamChanged)
if (s_bFogParamChanged && constant_profile.ConstantIsUsed(C_FOG+1))
{
++necessary_updates;
if(!g_ActiveConfig.bDisableFog)
{
//downscale magnitude to 0.24 bits
Expand All @@ -226,11 +284,12 @@ void PixelShaderManager::SetConstants()
else
SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0);

s_bFogParamChanged = false;
}
s_bFogParamChanged = false;
}else if ( s_bFogParamChanged) IncStuff();

if (s_bFogRangeAdjustChanged)
if (s_bFogRangeAdjustChanged && constant_profile.ConstantIsUsed(C_FOG+2))
{
++necessary_updates;
if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1)
{
//bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342;
Expand All @@ -251,8 +310,9 @@ void PixelShaderManager::SetConstants()
}

s_bFogRangeAdjustChanged = false;
}
}else if ( s_bFogRangeAdjustChanged) IncStuff();

// TODO: use constant profile here!
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f
{
if (nLightsChanged[0] >= 0)
Expand Down Expand Up @@ -349,8 +409,10 @@ void PixelShaderManager::SetPSTextureDims(int texid)
SetPSConstant4fv(C_TEXDIMS + texid, fdims);
}

// This one is high in profiles (0.5%). TODO: Move conversion out, only store the raw color value
// This one is high in profiles (0.5%).
// TODO: Move conversion out, only store the raw color value
// and update it when the shader constant is set, only.
// TODO: Conversion should be checked in the context of tev_fixes..
void PixelShaderManager::SetColorChanged(int type, int num, bool high)
{
float *pf = &lastRGBAfull[type][num][0];
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/Src/PixelShaderManager.h
Expand Up @@ -21,7 +21,7 @@ class PixelShaderManager
static void Shutdown();
static void DoState(PointerWrap &p);

static void SetConstants(); // sets pixel shader constants
static void SetConstants(u32 components); // sets pixel shader constants

// constant management, should be called after memory is committed
static void SetColorChanged(int type, int index, bool high);
Expand Down
286 changes: 286 additions & 0 deletions Source/Core/VideoCommon/Src/ShaderGenCommon.h
@@ -0,0 +1,286 @@
// Copyright (C) 2003 Dolphin Project.

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.

// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/

// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/

#ifndef _SHADERGENCOMMON_H
#define _SHADERGENCOMMON_H

#include <stdio.h>
#include <stdarg.h>
#include <string>
#include <vector>
#include <algorithm>

#include "CommonTypes.h"
#include "VideoCommon.h"

/**
* Common interface for classes that need to go through the shader generation path (GenerateVertexShader, GeneratePixelShader)
* In particular, this includes the shader code generator (ShaderCode).
* A different class (ShaderUid) can be used to uniquely identify each ShaderCode object.
* More interesting things can be done with this, e.g. ShaderConstantProfile checks what shader constants are being used. This can be used to optimize buffer management.
* Each of the ShaderCode, ShaderUid and ShaderConstantProfile child classes only implement the subset of ShaderGeneratorInterface methods that are required for the specific tasks.
*/
class ShaderGeneratorInterface
{
public:
/*
* Used when the shader generator would write a piece of ShaderCode.
* Can be used like printf.
* @note In the ShaderCode implementation, this does indeed write the parameter string to an internal buffer. However, you're free to do whatever you like with the parameter.
*/
void Write(const char* fmt, ...) {}

/*
* Returns a read pointer to the internal buffer.
* @note When implementing this method in a child class, you likely want to return the argument of the last SetBuffer call here
* @note SetBuffer() should be called before using GetBuffer().
*/
const char* GetBuffer() { return NULL; }

/*
* Can be used to give the object a place to write to. This should be called before using Write().
* @param buffer pointer to a char buffer that the object can write to
*/
void SetBuffer(char* buffer) { }

/*
* Tells us that a specific constant range (including last_index) is being used by the shader
*/
inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {}

/*
* Returns a pointer to an internally stored object of the uid_data type.
* @warning since most child classes use the default implementation you shouldn't access this directly without adding precautions against NULL access (e.g. via adding a dummy structure, cf. the vertex/pixel shader generators)
*/
template<class uid_data>
uid_data& GetUidData() { return *(uid_data*)NULL; }
};

/**
* Shader UID class used to uniquely identify the ShaderCode output written in the shader generator.
* uid_data can be any struct of parameters that uniquely identify each shader code output.
* Unless performance is not an issue, uid_data should be tightly packed to reduce memory footprint.
* Shader generators will write to specific uid_data fields; ShaderUid methods will only read raw u32 values from a union.
*/
template<class uid_data>
class ShaderUid : public ShaderGeneratorInterface
{
public:
ShaderUid()
{
// TODO: Move to Shadergen => can be optimized out
memset(values, 0, sizeof(values));
}

bool operator == (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, sizeof(values)) == 0;
}

bool operator != (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, sizeof(values)) != 0;
}

// determines the storage order inside STL containers
bool operator < (const ShaderUid& obj) const
{
// TODO: Store last frame used and order by that? makes much more sense anyway...
for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i)
{
if (this->values[i] < obj.values[i])
return true;
else if (this->values[i] > obj.values[i])
return false;
}
return false;
}

template<class T>
inline T& GetUidData() { return data; }

const uid_data& GetUidData() const { return data; }
size_t GetUidDataSize() const { return sizeof(values); }

private:
union
{
uid_data data;
u32 values[sizeof(uid_data) / sizeof(u32)];
};
};

class ShaderCode : public ShaderGeneratorInterface
{
public:
ShaderCode() : buf(NULL), write_ptr(NULL)
{

}

void Write(const char* fmt, ...)
{
va_list arglist;
va_start(arglist, fmt);
write_ptr += vsprintf(write_ptr, fmt, arglist);
va_end(arglist);
}

const char* GetBuffer() { return buf; }
void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; }

private:
const char* buf;
char* write_ptr;
};

/**
* Generates a shader constant profile which can be used to query which constants are used in a shader
*/
class ShaderConstantProfile : public ShaderGeneratorInterface
{
public:
ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); }

inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index)
{
for (unsigned int i = first_index; i < last_index+1; ++i)
constant_usage[i] = true;
}

inline bool ConstantIsUsed(unsigned int index)
{
// TODO: Not ready for usage yet
return true;
// return constant_usage[index];
}
private:
std::vector<bool> constant_usage; // TODO: Is vector<bool> appropriate here?
};

template<class T>
static void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const u32 num)
{
if (ApiType == API_OPENGL)
return; // Nothing to do here

object.Write(" : register(%s%d)", prefix, num);
}

template<class T>
static void WriteLocation(T& object, API_TYPE ApiType, bool using_ubos)
{
if (using_ubos)
return;

object.Write("uniform ");
}

template<class T>
static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const u32 num, const char* type, const char* name)
{
WriteLocation(object, api_type, using_ubos);
object.Write("%s %s ", type, name);
WriteRegister(object, api_type, "c", num);
object.Write(";\n");
}

#pragma pack(4)
/**
* Common uid data used for shader generators that use lighting calculations.
* Expected to be stored as a member called "lighting".
*/
struct LightingUidData
{
u32 matsource : 4; // 4x1 bit
u32 enablelighting : 4; // 4x1 bit
u32 ambsource : 4; // 4x1 bit
u32 diffusefunc : 8; // 4x2 bits
u32 attnfunc : 8; // 4x2 bits
u32 light_mask : 32; // 4x8 bits
};
#pragma pack()

/**
* Checks if there has been
*/
template<class UidT, class CodeT>
class UidChecker
{
public:
void Invalidate()
{
m_shaders.clear();
m_uids.clear();
}

void AddToIndexAndCheck(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix)
{
bool uid_is_indexed = std::find(m_uids.begin(), m_uids.end(), new_uid) != m_uids.end();
if (!uid_is_indexed)
{
m_uids.push_back(new_uid);
m_shaders[new_uid] = new_code.GetBuffer();
}
else
{
// uid is already in the index => check if there's a shader with the same uid but different code
auto& old_code = m_shaders[new_uid];
if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0)
{
static int num_failures = 0;

char szTemp[MAX_PATH];
sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
dump_prefix,
++num_failures);

// TODO: Should also dump uids
std::ofstream file;
OpenFStream(file, szTemp, std::ios_base::out);
file << "Old shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code.GetBuffer();
file << "\n\nShader uid:\n";
for (unsigned int i = 0; i < new_uid.GetUidDataSize(); ++i)
{
u32 value = ((u32*)&new_uid.GetUidData())[i];
if ((i % 4) == 0)
{
unsigned int last_value = (i+3 < new_uid.GetUidDataSize()-1) ? i+3 : new_uid.GetUidDataSize();
file << std::setfill(' ') << std::dec;
file << "Values " << std::setw(2) << i << " - " << last_value << ": ";
}

file << std::setw(8) << std::setfill('0') << std::hex << value << std::setw(1);
if ((i % 4) < 3)
file << ' ';
else
file << std::endl;
}
file.close();

ERROR_LOG(VIDEO, "%s shader uid mismatch! See %s for details", shader_type, szTemp);
}
}
}

private:
std::map<UidT,std::string> m_shaders;
std::vector<UidT> m_uids;
};

#endif // _SHADERGENCOMMON_H
487 changes: 204 additions & 283 deletions Source/Core/VideoCommon/Src/VertexShaderGen.cpp

Large diffs are not rendered by default.

102 changes: 35 additions & 67 deletions Source/Core/VideoCommon/Src/VertexShaderGen.h
Expand Up @@ -5,8 +5,10 @@
#ifndef GCOGL_VERTEXSHADER_H
#define GCOGL_VERTEXSHADER_H

#include <stdarg.h>
#include "XFMemory.h"
#include "VideoCommon.h"
#include "ShaderGenCommon.h"

// TODO should be reordered
#define SHADER_POSITION_ATTRIB 0
Expand Down Expand Up @@ -48,7 +50,8 @@
#define C_NORMALMATRICES (C_TRANSFORMMATRICES + 64)
#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32)
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 1)
#define C_VENVCONST_END (C_DEPTHPARAMS + 1)

const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
{I_PROJECTION , C_PROJECTION, 4 },
{I_MATERIALS, C_MATERIALS, 4 },
Expand All @@ -59,75 +62,40 @@ const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
{I_POSTTRANSFORMMATRICES, C_POSTTRANSFORMMATRICES, 64 },
{I_DEPTHPARAMS, C_DEPTHPARAMS, 1 },
};
template<bool safe>
class _VERTEXSHADERUID
{
#define NUM_VSUID_VALUES_SAFE 25
public:
u32 values[safe ? NUM_VSUID_VALUES_SAFE : 9];

_VERTEXSHADERUID()
{
}

_VERTEXSHADERUID(const _VERTEXSHADERUID& r)
{
for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i];
}

int GetNumValues() const
{
if (safe) return NUM_VSUID_VALUES_SAFE;
else return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1
}

bool operator <(const _VERTEXSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;

int N = GetNumValues();
for (int i = 1; i < N; ++i)
{
if (values[i] < _Right.values[i])
return true;
else if (values[i] > _Right.values[i])
return false;
}

return false;
}

bool operator ==(const _VERTEXSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;

int N = GetNumValues();
for (int i = 1; i < N; ++i)
{
if (values[i] != _Right.values[i])
return false;
}

return true;
}
};
typedef _VERTEXSHADERUID<false> VERTEXSHADERUID;
typedef _VERTEXSHADERUID<true> VERTEXSHADERUIDSAFE;

#pragma pack(4)

// components is included in the uid.
char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type);
const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type);
struct vertex_shader_uid_data
{
u32 components;
u32 numColorChans : 2;
u32 numTexGens : 4;

struct {
u32 projection : 1; // XF_TEXPROJ_X
u32 inputform : 2; // XF_TEXINPUT_X
u32 texgentype : 3; // XF_TEXGEN_X
u32 sourcerow : 5; // XF_SRCGEOM_X
u32 embosssourceshift : 3; // what generated texcoord to use
u32 embosslightshift : 3; // light index that is used
} texMtxInfo[8];
struct {
u32 index : 6; // base row of dual transform matrix
u32 normalize : 1; // normalize before send operation
} postMtxInfo[8];
struct {
u32 enabled : 1;
} dualTexTrans;

LightingUidData lighting;
};
#pragma pack()

void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components);
typedef ShaderUid<vertex_shader_uid_data> VertexShaderUid;
typedef ShaderCode VertexShaderCode; // TODO: Obsolete..

// Used to make sure that our optimized vertex shader IDs don't lose any possible shader code changes
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components);
void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type);
void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type);
void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type);

#endif // GCOGL_VERTEXSHADER_H
1 change: 1 addition & 0 deletions Source/Core/VideoCommon/VideoCommon.vcxproj
Expand Up @@ -252,6 +252,7 @@
<ClInclude Include="Src\PixelShaderGen.h" />
<ClInclude Include="Src\PixelShaderManager.h" />
<ClInclude Include="Src\RenderBase.h" />
<ClInclude Include="Src\ShaderGenCommon.h" />
<ClInclude Include="Src\Statistics.h" />
<ClInclude Include="Src\TextureCacheBase.h" />
<ClInclude Include="Src\TextureConversionShader.h" />
Expand Down
5 changes: 4 additions & 1 deletion Source/Core/VideoCommon/VideoCommon.vcxproj.filters
Expand Up @@ -258,6 +258,9 @@
<ClInclude Include="Src\FPSCounter.h">
<Filter>Util</Filter>
</ClInclude>
<ClInclude Include="Src\ShaderGenCommon.h">
<Filter>Shader Generators</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
Expand Down Expand Up @@ -291,4 +294,4 @@
<UniqueIdentifier>{e2a527a2-ccc8-4ab8-a93e-dd2628c0f3b6}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>
</Project>
13 changes: 7 additions & 6 deletions Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp
Expand Up @@ -169,11 +169,12 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth,
if (shaderIt == m_shaders.end())
{
// Generate new shader. Warning: not thread-safe.
static char code[16384];
char* p = code;
p = GenerateVSOutputStruct(p, components, API_D3D11);
p += sprintf(p, "\n%s", LINE_GS_COMMON);

static char buffer[16384];
ShaderCode code;
code.SetBuffer(buffer);
GenerateVSOutputStructForGS(code, components, API_D3D11);
code.Write("\n%s", LINE_GS_COMMON);

std::stringstream numTexCoordsStream;
numTexCoordsStream << xfregs.numTexGen.numTexGens;

Expand All @@ -185,7 +186,7 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth,
{ "NUM_TEXCOORDS", numTexCoordsStr.c_str() },
{ NULL, NULL }
};
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros);
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros);
if (!newShader)
{
WARN_LOG(VIDEO, "Line geometry shader for components 0x%.08X failed to compile", components);
Expand Down
34 changes: 20 additions & 14 deletions Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp
Expand Up @@ -28,9 +28,10 @@ namespace DX11

PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid;
PixelShaderUid PixelShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> PixelShaderCache::pixel_uid_checker;

LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;

ID3D11PixelShader* s_ColorMatrixProgram[2] = {NULL};
ID3D11PixelShader* s_ColorCopyProgram[2] = {NULL};
Expand Down Expand Up @@ -352,10 +353,10 @@ ID3D11Buffer* &PixelShaderCache::GetConstantBuffer()
}

// this class will load the precompiled shaders into our cache
class PixelShaderCacheInserter : public LinearDiskCacheReader<PIXELSHADERUID, u8>
class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
{
public:
void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size)
void Read(const PixelShaderUid &key, const u8 *value, u32 value_size)
{
PixelShaderCache::InsertByteCode(key, value, value_size);
}
Expand Down Expand Up @@ -414,7 +415,8 @@ void PixelShaderCache::Clear()
{
for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++)
iter->second.Destroy();
PixelShaders.clear();
PixelShaders.clear();
pixel_uid_checker.Invalidate();

last_entry = NULL;
}
Expand Down Expand Up @@ -450,16 +452,21 @@ void PixelShaderCache::Shutdown()

bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode, components);
PixelShaderUid uid;
GetPixelShaderUid(uid, dstAlphaMode, API_D3D11, components);
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components);
pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
}

// Check if the shader is already set
if (last_entry)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return (last_entry->shader != NULL);
}
}
Expand All @@ -475,15 +482,15 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
last_entry = &entry;

GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL);
}

// Need to compile a new shader
const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, components);
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components);

D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code, (unsigned int)strlen(code), &pbytecode))
if (!D3D::CompilePixelShader(code.GetBuffer(), (unsigned int)strlen(code.GetBuffer()), &pbytecode))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false;
Expand All @@ -497,15 +504,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)

if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code;
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
PixelShaders[uid].code = code.GetBuffer();
}

GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return success;
}

bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen)
bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen)
{
ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
if (shader == NULL)
Expand Down
9 changes: 5 additions & 4 deletions Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h
Expand Up @@ -22,7 +22,7 @@ class PixelShaderCache
static void Clear();
static void Shutdown();
static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); // TODO: Should be renamed to LoadShader
static bool InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen);
static bool InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen);

static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; }
static ID3D11Buffer* &GetConstantBuffer();
Expand All @@ -41,18 +41,19 @@ class PixelShaderCache
{
ID3D11PixelShader* shader;

PIXELSHADERUIDSAFE safe_uid;
std::string code;

PSCacheEntry() : shader(NULL) {}
void Destroy() { SAFE_RELEASE(shader); }
};

typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache;
typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;

static PSCache PixelShaders;
static const PSCacheEntry* last_entry;
static PIXELSHADERUID last_uid;
static PixelShaderUid last_uid;

static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
};

} // namespace DX11
11 changes: 6 additions & 5 deletions Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp
Expand Up @@ -163,10 +163,11 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize,
if (shaderIt == m_shaders.end())
{
// Generate new shader. Warning: not thread-safe.
static char code[16384];
char* p = code;
p = GenerateVSOutputStruct(p, components, API_D3D11);
p += sprintf(p, "\n%s", POINT_GS_COMMON);
static char buffer[16384];
ShaderCode code;
code.SetBuffer(buffer);
GenerateVSOutputStructForGS(code, components, API_D3D11);
code.Write("\n%s", POINT_GS_COMMON);

std::stringstream numTexCoordsStream;
numTexCoordsStream << xfregs.numTexGen.numTexGens;
Expand All @@ -179,7 +180,7 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize,
{ "NUM_TEXCOORDS", numTexCoordsStr.c_str() },
{ NULL, NULL }
};
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros);
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros);
if (!newShader)
{
WARN_LOG(VIDEO, "Point geometry shader for components 0x%.08X failed to compile", components);
Expand Down
2 changes: 1 addition & 1 deletion Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp
Expand Up @@ -238,7 +238,7 @@ void VertexManager::vFlush()

// set global constants
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);

bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;
Expand Down
33 changes: 20 additions & 13 deletions Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp
Expand Up @@ -24,14 +24,15 @@ namespace DX11 {

VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid;
VertexShaderUid VertexShaderCache::last_uid;
UidChecker<VertexShaderUid,VertexShaderCode> VertexShaderCache::vertex_uid_checker;

static ID3D11VertexShader* SimpleVertexShader = NULL;
static ID3D11VertexShader* ClearVertexShader = NULL;
static ID3D11InputLayout* SimpleLayout = NULL;
static ID3D11InputLayout* ClearLayout = NULL;

LinearDiskCache<VERTEXSHADERUID, u8> g_vs_disk_cache;
LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;

ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() { return SimpleVertexShader; }
ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() { return ClearVertexShader; }
Expand All @@ -57,10 +58,10 @@ ID3D11Buffer* &VertexShaderCache::GetConstantBuffer()
}

// this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VERTEXSHADERUID, u8>
class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
{
public:
void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size)
void Read(const VertexShaderUid &key, const u8 *value, u32 value_size)
{
D3DBlob* blob = new D3DBlob(value_size, value);
VertexShaderCache::InsertByteCode(key, blob);
Expand Down Expand Up @@ -176,6 +177,7 @@ void VertexShaderCache::Clear()
for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
iter->second.Destroy();
vshaders.clear();
vertex_uid_checker.Invalidate();

last_entry = NULL;
}
Expand All @@ -197,14 +199,20 @@ void VertexShaderCache::Shutdown()

bool VertexShaderCache::SetShader(u32 components)
{
VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components);
VertexShaderUid uid;
GetVertexShaderUid(uid, components, API_D3D11);
if (g_ActiveConfig.bEnableShaderDebugging)
{
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D11);
vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v");
}

if (last_entry)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL);
}
}
Expand All @@ -218,14 +226,14 @@ bool VertexShaderCache::SetShader(u32 components)
last_entry = &entry;

GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, entry.safe_uid, entry.code, components);
return (entry.shader != NULL);
}

const char *code = GenerateVertexShaderCode(components, API_D3D11);
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D11);

D3DBlob* pbytecode = NULL;
D3D::CompileVertexShader(code, (int)strlen(code), &pbytecode);
D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &pbytecode);

if (pbytecode == NULL)
{
Expand All @@ -239,15 +247,14 @@ bool VertexShaderCache::SetShader(u32 components)

if (g_ActiveConfig.bEnableShaderDebugging && success)
{
vshaders[uid].code = code;
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
vshaders[uid].code = code.GetBuffer();
}

GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return success;
}

bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob)
bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob)
{
ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
if (shader == NULL)
Expand Down
9 changes: 5 additions & 4 deletions Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h
Expand Up @@ -31,15 +31,14 @@ class VertexShaderCache
static ID3D11InputLayout* GetSimpleInputLayout();
static ID3D11InputLayout* GetClearInputLayout();

static bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob);
static bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob);

private:
struct VSCacheEntry
{
ID3D11VertexShader* shader;
D3DBlob* bytecode; // needed to initialize the input layout

VERTEXSHADERUIDSAFE safe_uid;
std::string code;

VSCacheEntry() : shader(NULL), bytecode(NULL) {}
Expand All @@ -55,11 +54,13 @@ class VertexShaderCache
SAFE_RELEASE(bytecode);
}
};
typedef std::map<VERTEXSHADERUID, VSCacheEntry> VSCache;
typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;

static VSCache vshaders;
static const VSCacheEntry* last_entry;
static VERTEXSHADERUID last_uid;
static VertexShaderUid last_uid;

static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;
};

} // namespace DX11
Expand Down
38 changes: 22 additions & 16 deletions Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp
Expand Up @@ -31,9 +31,10 @@ namespace DX9

PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid;
PixelShaderUid PixelShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> PixelShaderCache::pixel_uid_checker;

static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
static LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
static std::set<u32> unique_shaders;

#define MAX_SSAA_SHADERS 3
Expand All @@ -55,10 +56,10 @@ static LPDIRECT3DPIXELSHADER9 s_ClearProgram = NULL;
static LPDIRECT3DPIXELSHADER9 s_rgba6_to_rgb8 = NULL;
static LPDIRECT3DPIXELSHADER9 s_rgb8_to_rgba6 = NULL;

class PixelShaderCacheInserter : public LinearDiskCacheReader<PIXELSHADERUID, u8>
class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
{
public:
void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size)
void Read(const PixelShaderUid &key, const u8 *value, u32 value_size)
{
PixelShaderCache::InsertByteCode(key, value, value_size, false);
}
Expand Down Expand Up @@ -287,6 +288,7 @@ void PixelShaderCache::Clear()
for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++)
iter->second.Destroy();
PixelShaders.clear();
pixel_uid_checker.Invalidate();

last_entry = NULL;
}
Expand Down Expand Up @@ -323,16 +325,21 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30;
PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode, components);
PixelShaderUid uid;
GetPixelShaderUid(uid, dstAlphaMode, API_D3D9, components);
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D9, components);
pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
}

// Check if the shader is already set
if (last_entry)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return last_entry->shader != NULL;
}
}
Expand All @@ -349,34 +356,34 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)

if (entry.shader) D3D::SetPixelShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL);
}


// Need to compile a new shader
const char *code = GeneratePixelShaderCode(dstAlphaMode, api, components);
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, api, components);

if (g_ActiveConfig.bEnableShaderDebugging)
{
u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
u32 code_hash = HashAdler32((const u8 *)code.GetBuffer(), strlen(code.GetBuffer()));
unique_shaders.insert(code_hash);
SETSTAT(stats.numUniquePixelShaders, unique_shaders.size());
}

#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
if (g_ActiveConfig.iLog & CONF_SAVESHADERS) {
static int counter = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);

SaveData(szTemp, code);
SaveData(szTemp, code.GetBuffer());
}
#endif

u8 *bytecode = 0;
int bytecodelen = 0;
if (!D3D::CompilePixelShader(code, (int)strlen(code), &bytecode, &bytecodelen)) {
if (!D3D::CompilePixelShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen)) {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false;
}
Expand All @@ -390,15 +397,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)

if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code;
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
PixelShaders[uid].code = code.GetBuffer();
}

GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return success;
}

bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate)
bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate)
{
LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);

Expand Down
9 changes: 5 additions & 4 deletions Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h
Expand Up @@ -28,7 +28,6 @@ class PixelShaderCache
LPDIRECT3DPIXELSHADER9 shader;
bool owns_shader;

PIXELSHADERUIDSAFE safe_uid;
std::string code;

PSCacheEntry() : shader(NULL), owns_shader(true) {}
Expand All @@ -40,18 +39,20 @@ class PixelShaderCache
}
};

typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache;
typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;

static PSCache PixelShaders;
static const PSCacheEntry *last_entry;
static PIXELSHADERUID last_uid;
static PixelShaderUid last_uid;
static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;

static void Clear();

public:
static void Init();
static void Shutdown();
static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 componets);
static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate);
static bool InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate);
static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode, bool depthConversion);
Expand Down
2 changes: 1 addition & 1 deletion Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
Expand Up @@ -348,7 +348,7 @@ void VertexManager::vFlush()

// set global constants
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
u32 stride = g_nativeVertexFmt->GetVertexStride();
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;
Expand Down
34 changes: 21 additions & 13 deletions Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp
Expand Up @@ -25,14 +25,15 @@ namespace DX9

VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid;
VertexShaderUid VertexShaderCache::last_uid;
UidChecker<VertexShaderUid,VertexShaderCode> VertexShaderCache::vertex_uid_checker;

#define MAX_SSAA_SHADERS 3

static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS];
static LPDIRECT3DVERTEXSHADER9 ClearVertexShader;

LinearDiskCache<VERTEXSHADERUID, u8> g_vs_disk_cache;
LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;

LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level)
{
Expand All @@ -45,10 +46,10 @@ LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader()
}

// this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VERTEXSHADERUID, u8>
class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
{
public:
void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size)
void Read(const VertexShaderUid &key, const u8 *value, u32 value_size)
{
VertexShaderCache::InsertByteCode(key, value, value_size, false);
}
Expand Down Expand Up @@ -150,6 +151,7 @@ void VertexShaderCache::Clear()
for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
iter->second.Destroy();
vshaders.clear();
vertex_uid_checker.Invalidate();

last_entry = NULL;
}
Expand All @@ -174,14 +176,20 @@ void VertexShaderCache::Shutdown()

bool VertexShaderCache::SetShader(u32 components)
{
VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components);
VertexShaderUid uid;
GetVertexShaderUid(uid, components, API_D3D9);
if (g_ActiveConfig.bEnableShaderDebugging)
{
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D9);
vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v");
}

if (last_entry)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL);
}
}
Expand All @@ -196,14 +204,15 @@ bool VertexShaderCache::SetShader(u32 components)

if (entry.shader) D3D::SetVertexShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, entry.safe_uid, entry.code, components);
return (entry.shader != NULL);
}

const char *code = GenerateVertexShaderCode(components, API_D3D9);
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D9);

u8 *bytecode;
int bytecodelen;
if (!D3D::CompileVertexShader(code, (int)strlen(code), &bytecode, &bytecodelen))
if (!D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false;
Expand All @@ -213,15 +222,14 @@ bool VertexShaderCache::SetShader(u32 components)
bool success = InsertByteCode(uid, bytecode, bytecodelen, true);
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
vshaders[uid].code = code;
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
vshaders[uid].code = code.GetBuffer();
}
delete [] bytecode;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return success;
}

bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) {
bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate) {
LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen);

// Make an entry in the table
Expand Down
10 changes: 6 additions & 4 deletions Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h
Expand Up @@ -23,7 +23,6 @@ class VertexShaderCache
LPDIRECT3DVERTEXSHADER9 shader;

std::string code;
VERTEXSHADERUIDSAFE safe_uid;

VSCacheEntry() : shader(NULL) {}
void Destroy()
Expand All @@ -34,11 +33,14 @@ class VertexShaderCache
}
};

typedef std::map<VERTEXSHADERUID, VSCacheEntry> VSCache;
typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;

static VSCache vshaders;
static const VSCacheEntry *last_entry;
static VERTEXSHADERUID last_uid;
static VertexShaderUid last_uid;

static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;

static void Clear();

public:
Expand All @@ -47,7 +49,7 @@ class VertexShaderCache
static bool SetShader(u32 components);
static LPDIRECT3DVERTEXSHADER9 GetSimpleVertexShader(int level);
static LPDIRECT3DVERTEXSHADER9 GetClearVertexShader();
static bool InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate);
static bool InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate);

static std::string GetCurrentShaderCode();
};
Expand Down
67 changes: 33 additions & 34 deletions Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp
Expand Up @@ -31,6 +31,8 @@ static GLuint CurrentProgram = 0;
ProgramShaderCache::PCache ProgramShaderCache::pshaders;
ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry;
SHADERUID ProgramShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> ProgramShaderCache::pixel_uid_checker;
UidChecker<VertexShaderUid,VertexShaderCode> ProgramShaderCache::vertex_uid_checker;

static char s_glsl_header[1024] = "";

Expand Down Expand Up @@ -186,21 +188,20 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
{
SHADERUID uid;
GetShaderId(&uid, dstAlphaMode, components);

// Check if the shader is already set
if (last_entry)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidateShaderIDs(last_entry, dstAlphaMode, components);
last_entry->shader.Bind();
return &last_entry->shader;
}
}

last_uid = uid;

// Check if shader is already in cache
PCache::iterator iter = pshaders.find(uid);
if (iter != pshaders.end())
Expand All @@ -209,38 +210,38 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
last_entry = entry;

GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidateShaderIDs(entry, dstAlphaMode, components);
last_entry->shader.Bind();
return &last_entry->shader;
}

// Make an entry in the table
PCacheEntry& newentry = pshaders[uid];
last_entry = &newentry;
newentry.in_cache = 0;

const char *vcode = GenerateVertexShaderCode(components, API_OPENGL);
const char *pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components);


VertexShaderCode vcode;
PixelShaderCode pcode;
GenerateVertexShaderCode(vcode, components, API_OPENGL);
GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);

if (g_ActiveConfig.bEnableShaderDebugging)
{
GetSafeShaderId(&newentry.safe_uid, dstAlphaMode, components);
newentry.shader.strvprog = vcode;
newentry.shader.strpprog = pcode;
newentry.shader.strvprog = vcode.GetBuffer();
newentry.shader.strpprog = pcode.GetBuffer();
}

#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS) {
static int counter = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
SaveData(szTemp, vcode);
SaveData(szTemp, vcode.GetBuffer());
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
SaveData(szTemp, pcode);
SaveData(szTemp, pcode.GetBuffer());
}
#endif

if (!CompileShader(newentry.shader, vcode, pcode)) {
if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer())) {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return NULL;
}
Expand All @@ -257,7 +258,7 @@ bool ProgramShaderCache::CompileShader ( SHADER& shader, const char* vcode, cons
{
GLuint vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode);
GLuint psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode);

if(!vsid || !psid)
{
glDeleteShader(vsid);
Expand Down Expand Up @@ -380,28 +381,23 @@ GLuint ProgramShaderCache::CompileSingleShader (GLuint type, const char* code )
return result;
}



void ProgramShaderCache::GetShaderId ( SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components )
void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
GetPixelShaderId(&uid->puid, dstAlphaMode, components);
GetVertexShaderId(&uid->vuid, components);
}
GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components);
GetVertexShaderUid(uid->vuid, components, API_OPENGL);

void ProgramShaderCache::GetSafeShaderId ( SHADERUIDSAFE* uid, DSTALPHA_MODE dstAlphaMode, u32 components )
{
GetSafePixelShaderId(&uid->puid, dstAlphaMode, components);
GetSafeVertexShaderId(&uid->vuid, components);
}
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode pcode;
GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);
pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p");

void ProgramShaderCache::ValidateShaderIDs ( PCacheEntry *entry, DSTALPHA_MODE dstAlphaMode, u32 components )
{
ValidateVertexShaderIDs(API_OPENGL, entry->safe_uid.vuid, entry->shader.strvprog, components);
ValidatePixelShaderIDs(API_OPENGL, entry->safe_uid.puid, entry->shader.strpprog, dstAlphaMode, components);
VertexShaderCode vcode;
GenerateVertexShaderCode(vcode, components, API_OPENGL);
vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v");
}
}



ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void)
{
return *last_entry;
Expand Down Expand Up @@ -497,6 +493,9 @@ void ProgramShaderCache::Shutdown(void)
iter->second.Destroy();
pshaders.clear();

pixel_uid_checker.Invalidate();
vertex_uid_checker.Invalidate();

if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{
delete s_buffer;
Expand Down
27 changes: 12 additions & 15 deletions Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h
Expand Up @@ -16,32 +16,29 @@
namespace OGL
{

template<bool safe>
class _SHADERUID
class SHADERUID
{
public:
_VERTEXSHADERUID<safe> vuid;
_PIXELSHADERUID<safe> puid;
VertexShaderUid vuid;
PixelShaderUid puid;

_SHADERUID() {}
SHADERUID() {}

_SHADERUID(const _SHADERUID& r) : vuid(r.vuid), puid(r.puid) {}
SHADERUID(const SHADERUID& r) : vuid(r.vuid), puid(r.puid) {}

bool operator <(const _SHADERUID& r) const
bool operator <(const SHADERUID& r) const
{
if(puid < r.puid) return true;
if(r.puid < puid) return false;
if(vuid < r.vuid) return true;
return false;
}

bool operator ==(const _SHADERUID& r) const
bool operator ==(const SHADERUID& r) const
{
return puid == r.puid && vuid == r.vuid;
}
};
typedef _SHADERUID<false> SHADERUID;
typedef _SHADERUID<true> SHADERUIDSAFE;


const int NUM_UNIFORMS = 19;
Expand Down Expand Up @@ -72,7 +69,6 @@ class ProgramShaderCache
struct PCacheEntry
{
SHADER shader;
SHADERUIDSAFE safe_uid;
bool in_cache;

void Destroy()
Expand All @@ -81,12 +77,12 @@ class ProgramShaderCache
}
};

typedef std::map<SHADERUID, PCacheEntry> PCache;

static PCacheEntry GetShaderProgram(void);
static GLuint GetCurrentProgram(void);
static SHADER* SetShader(DSTALPHA_MODE dstAlphaMode, u32 components);
static void GetShaderId(SHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
static void GetSafeShaderId(SHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
static void ValidateShaderIDs(PCacheEntry *entry, DSTALPHA_MODE dstAlphaMode, u32 components);

static bool CompileShader(SHADER &shader, const char* vcode, const char* pcode);
static GLuint CompileSingleShader(GLuint type, const char *code);
Expand All @@ -106,12 +102,13 @@ class ProgramShaderCache
void Read(const SHADERUID &key, const u8 *value, u32 value_size);
};

typedef std::map<SHADERUID, PCacheEntry> PCache;

static PCache pshaders;
static PCacheEntry* last_entry;
static SHADERUID last_uid;

static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;

static GLintptr s_vs_data_size;
static GLintptr s_ps_data_size;
static GLintptr s_vs_data_offset;
Expand Down
4 changes: 2 additions & 2 deletions Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp
Expand Up @@ -257,7 +257,7 @@ void VertexManager::vFlush()

// set global constants
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
ProgramShaderCache::UploadConstants();

// setup the pointers
Expand All @@ -278,7 +278,7 @@ void VertexManager::vFlush()
{
// Need to set these again, if we don't support UBO
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
}

// only update alpha
Expand Down