@@ -55,6 +55,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
const bool stereo = host_config.stereo ;
const bool use_dual_source = host_config.backend_dual_source_blend ;
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch ;
const bool use_shader_logic_op =
!host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch ;
const bool use_framebuffer_fetch = use_shader_blend || use_shader_logic_op;
const bool early_depth = uid_data->early_depth != 0 ;
const bool per_pixel_depth = uid_data->per_pixel_depth != 0 ;
const bool bounding_box = host_config.bounding_box ;
@@ -71,37 +74,51 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
// Shader inputs/outputs in GLSL (HLSL is in main).
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
if (use_dual_source)
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
{
if (DriverDetails::HasBug (DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.Write (" FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n "
" FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n " );
}
else
if (use_dual_source)
{
out.Write (" FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n "
" FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n " );
}
}
else if (use_shader_blend)
{
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so pull out the "real" output value
// and use a temporary for calculations, then set the output value once at the end of the
// shader
if (DriverDetails::HasBug (DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
else if (use_shader_blend)
{
out.Write (" FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n " );
// Metal doesn't support a single unified variable for both input and output, so we declare
// the output separately. The input will be defined later below.
out.Write (" FRAGMENT_OUTPUT_LOCATION(0) out vec4 real_ocol0;\n " );
}
else
{
out.Write (" FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n " );
out.Write (" FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n " );
}
if (use_framebuffer_fetch)
{
// Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
out.Write (" INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n " );
}
}
else
#endif
{
out.Write (" FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n " );
bool has_broken_decoration =
DriverDetails::HasBug (DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION);
out.Write (" {} {} vec4 {};\n " ,
has_broken_decoration ? " FRAGMENT_OUTPUT_LOCATION(0)" :
" FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)" ,
use_framebuffer_fetch ? " FRAGMENT_INOUT" : " out" ,
use_shader_blend ? " real_ocol0" : " ocol0" );
if (use_dual_source)
{
out.Write (" {} out vec4 ocol1;\n " , has_broken_decoration ?
" FRAGMENT_OUTPUT_LOCATION(1)" :
" FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)" );
}
}
if (per_pixel_depth)
@@ -511,11 +528,28 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
out.Write (" void main()\n {{\n " );
out.Write (" float4 rawpos = gl_FragCoord;\n " );
if (use_framebuffer_fetch)
{
// Store off a copy of the initial framebuffer value.
//
// If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the
// framebuffer), we read from real_ocol0 or ocol0, depending if shader blending is enabled.
out.Write (" #ifdef FB_FETCH_VALUE\n "
" float4 initial_ocol0 = FB_FETCH_VALUE;\n "
" #else\n "
" float4 initial_ocol0 = {};\n "
" #endif\n " ,
use_shader_blend ? " real_ocol0" : " ocol0" );
}
if (use_shader_blend)
{
// Store off a copy of the initial fb value for blending
out.Write (" float4 initial_ocol0 = FB_FETCH_VALUE;\n "
" float4 ocol0;\n "
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so we pull out the "real" output
// value above and use a temporary for calculations, then set the output value once at the
// end of the shader if we are using shader blending.
out.Write (" float4 ocol0;\n "
" float4 ocol1;\n " );
}
}
@@ -1075,6 +1109,40 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" }}\n "
" \n " );
if (use_shader_logic_op)
{
static constexpr std::array<const char *, 16 > logic_op_mode{
" int4(0, 0, 0, 0)" , // CLEAR
" TevResult & fb_value" , // AND
" TevResult & ~fb_value" , // AND_REVERSE
" TevResult" , // COPY
" ~TevResult & fb_value" , // AND_INVERTED
" fb_value" , // NOOP
" TevResult ^ fb_value" , // XOR
" TevResult | fb_value" , // OR
" ~(TevResult | fb_value)" , // NOR
" ~(TevResult ^ fb_value)" , // EQUIV
" ~fb_value" , // INVERT
" TevResult | ~fb_value" , // OR_REVERSE
" ~TevResult" , // COPY_INVERTED
" ~TevResult | fb_value" , // OR_INVERTED
" ~(TevResult & fb_value)" , // NAND
" int4(255, 255, 255, 255)" , // SET
};
out.Write (" // Logic Ops\n "
" if (logic_op_enable) {{\n "
" int4 fb_value = iround(initial_ocol0 * 255.0);"
" switch (logic_op_mode) {{\n " );
for (size_t i = 0 ; i < logic_op_mode.size (); i++)
{
out.Write (" case {}u: TevResult = {}; break;\n " , i, logic_op_mode[i]);
}
out.Write (" }}\n "
" }}\n " );
}
// D3D requires that the shader outputs be uint when writing to a uint render target for logic op.
if (api_type == APIType::D3D && uid_data->uint_output )
{