/
ShaderGenCommon.h
322 lines (283 loc) · 12.7 KB
/
ShaderGenCommon.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
// Copyright 2012 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstring>
#include <iterator>
#include <string>
#include <type_traits>
#include <vector>
#include <fmt/format.h>
#include "Common/BitField.h"
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/StringUtil.h"
#include "Common/TypeUtils.h"
#include "VideoCommon/VideoCommon.h"
/**
* Common interface for classes that need to go through the shader generation path
* (GenerateVertexShader, GenerateGeometryShader, GeneratePixelShader)
* In particular, this includes the shader code generator (ShaderCode).
* A different class (ShaderUid) can be used to uniquely identify each ShaderCode object.
* More interesting things can be done with this, e.g. ShaderConstantProfile checks what shader
* constants are being used. This can be used to optimize buffer management.
* If the class does not use one or more of these methods (e.g. Uid class does not need code), the
* method will be defined as a no-op by the base class, and the call
* should be optimized out. The reason for this implementation is so that shader
* selection/generation can be done in two passes, with only a cache lookup being
* required if the shader has already been generated.
*/
class ShaderGeneratorInterface
{
public:
/*
* Used when the shader generator would write a piece of ShaderCode.
* Can be used like printf.
* @note In the ShaderCode implementation, this does indeed write the parameter string to an
* internal buffer. However, you're free to do whatever you like with the parameter.
*/
void Write(const char*, ...)
#ifdef __GNUC__
__attribute__((format(printf, 2, 3)))
#endif
{
}
/*
* Tells us that a specific constant range (including last_index) is being used by the shader
*/
void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {}
};
/*
* Shader UID class used to uniquely identify the ShaderCode output written in the shader generator.
* uid_data can be any struct of parameters that uniquely identify each shader code output.
* Unless performance is not an issue, uid_data should be tightly packed to reduce memory footprint.
* Shader generators will write to specific uid_data fields; ShaderUid methods will only read raw
* u32 values from a union.
* NOTE: Because LinearDiskCache reads and writes the storage associated with a ShaderUid instance,
* ShaderUid must be trivially copyable.
*/
template <class uid_data>
class ShaderUid : public ShaderGeneratorInterface
{
public:
static_assert(std::is_trivially_copyable_v<uid_data>,
"uid_data must be a trivially copyable type");
bool operator==(const ShaderUid& obj) const
{
return memcmp(GetUidData(), obj.GetUidData(), GetUidDataSize()) == 0;
}
bool operator!=(const ShaderUid& obj) const { return !operator==(obj); }
// determines the storage order inside STL containers
bool operator<(const ShaderUid& obj) const
{
return memcmp(GetUidData(), obj.GetUidData(), GetUidDataSize()) < 0;
}
// Returns a pointer to an internally stored object of the uid_data type.
uid_data* GetUidData() { return &data; }
// Returns a pointer to an internally stored object of the uid_data type.
const uid_data* GetUidData() const { return &data; }
// Returns the raw bytes that make up the shader UID.
const u8* GetUidDataRaw() const { return reinterpret_cast<const u8*>(&data); }
// Returns the size of the underlying UID data structure in bytes.
size_t GetUidDataSize() const { return sizeof(data); }
private:
uid_data data{};
};
class ShaderCode : public ShaderGeneratorInterface
{
public:
ShaderCode() { m_buffer.reserve(16384); }
const std::string& GetBuffer() const { return m_buffer; }
// Writes format strings using fmtlib format strings.
template <typename... Args>
void Write(fmt::format_string<Args...> format, Args&&... args)
{
fmt::format_to(std::back_inserter(m_buffer), format, std::forward<Args>(args)...);
}
protected:
std::string m_buffer;
};
/**
* Generates a shader constant profile which can be used to query which constants are used in a
* shader
*/
class ShaderConstantProfile : public ShaderGeneratorInterface
{
public:
ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); }
void SetConstantsUsed(unsigned int first_index, unsigned int last_index)
{
for (unsigned int i = first_index; i < last_index + 1; ++i)
constant_usage[i] = true;
}
bool ConstantIsUsed(unsigned int index) const
{
// TODO: Not ready for usage yet
return true;
// return constant_usage[index];
}
private:
std::vector<bool> constant_usage; // TODO: Is vector<bool> appropriate here?
};
// Host config contains the settings which can influence generated shaders.
union ShaderHostConfig
{
u32 bits;
BitField<0, 1, bool, u32> msaa;
BitField<1, 1, bool, u32> ssaa;
BitField<2, 1, bool, u32> stereo;
BitField<3, 1, bool, u32> wireframe;
BitField<4, 1, bool, u32> per_pixel_lighting;
BitField<5, 1, bool, u32> vertex_rounding;
BitField<6, 1, bool, u32> fast_depth_calc;
BitField<7, 1, bool, u32> bounding_box;
BitField<8, 1, bool, u32> backend_dual_source_blend;
BitField<9, 1, bool, u32> backend_geometry_shaders;
BitField<10, 1, bool, u32> backend_early_z;
BitField<11, 1, bool, u32> backend_bbox;
BitField<12, 1, bool, u32> backend_gs_instancing;
BitField<13, 1, bool, u32> backend_clip_control;
BitField<14, 1, bool, u32> backend_ssaa;
BitField<15, 1, bool, u32> backend_atomics;
BitField<16, 1, bool, u32> backend_depth_clamp;
BitField<17, 1, bool, u32> backend_reversed_depth_range;
BitField<18, 1, bool, u32> backend_bitfield;
BitField<19, 1, bool, u32> backend_dynamic_sampler_indexing;
BitField<20, 1, bool, u32> backend_shader_framebuffer_fetch;
BitField<21, 1, bool, u32> backend_logic_op;
BitField<22, 1, bool, u32> backend_palette_conversion;
BitField<23, 1, bool, u32> enable_validation_layer;
BitField<24, 1, bool, u32> manual_texture_sampling;
BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes;
BitField<26, 1, bool, u32> backend_sampler_lod_bias;
static ShaderHostConfig GetCurrent();
};
// Gets the filename of the specified type of cache object (e.g. vertex shader, pipeline).
std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool include_gameid,
bool include_host_config, bool include_api = true);
void WriteIsNanHeader(ShaderCode& out, APIType api_type);
void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config);
void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
const ShaderHostConfig& host_config, std::string_view qualifier);
void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_view b, u32 texgens,
const ShaderHostConfig& host_config);
// We use the flag "centroid" to fix some MSAA rendering bugs. With MSAA, the
// pixel shader will be executed for each pixel which has at least one passed sample.
// So there may be rendered pixels where the center of the pixel isn't in the primitive.
// As the pixel shader usually renders at the center of the pixel, this position may be
// outside the primitive. This will lead to sampling outside the texture, sign changes, ...
// As a workaround, we interpolate at the centroid of the coveraged pixel, which
// is always inside the primitive.
// Without MSAA, this flag is defined to have no effect.
const char* GetInterpolationQualifier(bool msaa, bool ssaa, bool in_glsl_interface_block = false,
bool in = false);
// bitfieldExtract generator for BitField types
template <auto ptr_to_bitfield_member>
std::string BitfieldExtract(std::string_view source)
{
using BitFieldT = Common::MemberType<ptr_to_bitfield_member>;
return fmt::format("bitfieldExtract({}({}), {}, {})", BitFieldT::IsSigned() ? "int" : "uint",
source, static_cast<u32>(BitFieldT::StartBit()),
static_cast<u32>(BitFieldT::NumBits()));
}
template <auto last_member, typename = decltype(last_member)>
void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable,
const Common::EnumMap<std::string_view, last_member>& values, int indent,
bool break_)
{
const bool make_switch = (ApiType == APIType::D3D);
// The second template argument is needed to avoid compile errors from ambiguity with multiple
// enums with the same number of members in GCC prior to 8. See https://godbolt.org/z/xcKaW1seW
// and https://godbolt.org/z/hz7Yqq1P5
using enum_type = decltype(last_member);
// {:{}} is used to indent by formatting an empty string with a variable width
if (make_switch)
{
out.Write("{:{}}switch ({}) {{\n", "", indent, variable);
for (u32 i = 0; i <= static_cast<u32>(last_member); i++)
{
const enum_type key = static_cast<enum_type>(i);
// Assumes existence of an EnumFormatter
out.Write("{:{}}case {:s}:\n", "", indent, key);
// Note that this indentation behaves poorly for multi-line code
if (!values[key].empty())
out.Write("{:{}} {}\n", "", indent, values[key]);
if (break_)
out.Write("{:{}} break;\n", "", indent);
}
out.Write("{:{}}}}\n", "", indent);
}
else
{
// Generate a tree of if statements recursively
// std::function must be used because auto won't capture before initialization and thus can't be
// used recursively
std::function<void(u32, u32, u32)> BuildTree = [&](u32 cur_indent, u32 low, u32 high) {
// Each generated statement is for low <= x < high
if (high == low + 1)
{
// Down to 1 case (low <= x < low + 1 means x == low)
const enum_type key = static_cast<enum_type>(low);
// Note that this indentation behaves poorly for multi-line code
out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key);
}
else
{
u32 mid = low + ((high - low) / 2);
out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid);
BuildTree(cur_indent + 2, low, mid);
out.Write("{:{}}}} else {{\n", "", cur_indent);
BuildTree(cur_indent + 2, mid, high);
out.Write("{:{}}}}\n", "", cur_indent);
}
};
BuildTree(indent, 0, static_cast<u32>(last_member) + 1);
}
}
// Constant variable names
#define I_COLORS "color"
#define I_KCOLORS "k"
#define I_ALPHA "alphaRef"
#define I_TEXDIMS "texdim"
#define I_ZBIAS "czbias"
#define I_INDTEXSCALE "cindscale"
#define I_INDTEXMTX "cindmtx"
#define I_FOGCOLOR "cfogcolor"
#define I_FOGI "cfogi"
#define I_FOGF "cfogf"
#define I_FOGRANGE "cfogrange"
#define I_ZSLOPE "czslope"
#define I_EFBSCALE "cefbscale"
#define I_POSNORMALMATRIX "cpnmtx"
#define I_PROJECTION "cproj"
#define I_MATERIALS "cmtrl"
#define I_LIGHTS "clights"
#define I_TEXMATRICES "ctexmtx"
#define I_TRANSFORMMATRICES "ctrmtx"
#define I_NORMALMATRICES "cnmtx"
#define I_POSTTRANSFORMMATRICES "cpostmtx"
#define I_PIXELCENTERCORRECTION "cpixelcenter"
#define I_VIEWPORT_SIZE "cviewport"
#define I_STEREOPARAMS "cstereo"
#define I_LINEPTPARAMS "clinept"
#define I_TEXOFFSET "ctexoffset"
static const char s_shader_uniforms[] = "\tuint components;\n"
"\tuint xfmem_dualTexInfo;\n"
"\tuint xfmem_numColorChans;\n"
"\tuint missing_color_hex;\n"
"\tfloat4 missing_color_value;\n"
"\tfloat4 " I_POSNORMALMATRIX "[6];\n"
"\tfloat4 " I_PROJECTION "[4];\n"
"\tint4 " I_MATERIALS "[4];\n"
"\tLight " I_LIGHTS "[8];\n"
"\tfloat4 " I_TEXMATRICES "[24];\n"
"\tfloat4 " I_TRANSFORMMATRICES "[64];\n"
"\tfloat4 " I_NORMALMATRICES "[32];\n"
"\tfloat4 " I_POSTTRANSFORMMATRICES "[64];\n"
"\tfloat4 " I_PIXELCENTERCORRECTION ";\n"
"\tfloat2 " I_VIEWPORT_SIZE ";\n"
"\tuint4 xfmem_pack1[8];\n"
"\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n"
"\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n"
"\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n"
"\t#define xfmem_alpha(i) (xfmem_pack1[(i)].w)\n";