Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EFB2RAM: Apply copy filter as a float coefficient after sampling #6936

Merged
merged 1 commit into from May 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
EFB2RAM: Apply copy filter as a float coefficient after sampling
Using 8-bit integer math here lead to precision loss for depth copies,
which broke various effects in games, e.g. lens flare in MK:DD.

It's unlikely the console implements this as a floating-point multiply
(fixed-point perhaps), but since we have the float round trip in our
EFB2RAM shaders anyway, it's not going to make things any worse. If we
do rewrite our shaders to use integer math completely, then it might be
worth switching this conversion back to integers.

However, the range of the values (format) should be known, or we should
expand all values out to 24-bits first.
  • Loading branch information
stenzek committed May 22, 2018
commit f74dbc794c124e6f1c5d5f60ff1d6fe65acc379f
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
Expand Up @@ -34,7 +34,7 @@ struct EFBEncodeParams
float gamma_rcp;
float clamp_top;
float clamp_bottom;
s32 filter_coefficients[3];
float filter_coefficients[3];
u32 padding;
};

Expand Down Expand Up @@ -169,4 +169,4 @@ ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyParams&
m_encoding_shaders.emplace(params, newShader);
return newShader;
}
}
} // namespace DX11
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/D3D/TextureCache.cpp
Expand Up @@ -276,7 +276,7 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
};
PixelConstants constants;
for (size_t i = 0; i < filter_coefficients.size(); i++)
constants.filter_coefficients[i] = filter_coefficients[i] / 64.0f;
constants.filter_coefficients[i] = filter_coefficients[i];
constants.gamma_rcp = 1.0f / gamma;
constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
Expand Down Expand Up @@ -315,4 +315,4 @@ TextureCache::GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderU
m_efb_to_tex_pixel_shaders.emplace(uid, shader);
return shader;
}
}
} // namespace DX11
6 changes: 3 additions & 3 deletions Source/Core/VideoBackends/OGL/TextureCache.cpp
Expand Up @@ -558,12 +558,12 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
glUniform2f(shader.clamp_tb_uniform,
clamp_bottom ? (1.0f - src_rect.bottom / static_cast<float>(EFB_HEIGHT)) : 0.0f,
clamp_top ? (1.0f - src_rect.top / static_cast<float>(EFB_HEIGHT)) : 1.0f);
glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0] / 64.0f,
filter_coefficients[1] / 64.0f, filter_coefficients[2] / 64.0f);
glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0], filter_coefficients[1],
filter_coefficients[2]);

ProgramShaderCache::BindVertexFormat(nullptr);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

g_renderer->RestoreAPIState();
}
}
} // namespace OGL
6 changes: 3 additions & 3 deletions Source/Core/VideoBackends/OGL/TextureConverter.cpp
Expand Up @@ -50,7 +50,7 @@ std::unique_ptr<AbstractStagingTexture> s_encoding_readback_texture;

const int renderBufferWidth = EFB_WIDTH * 4;
const int renderBufferHeight = 1024;
}
} // namespace

static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
{
Expand Down Expand Up @@ -158,7 +158,7 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
glUniform1f(texconv_shader.y_scale_uniform, y_scale);
glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma);
glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom);
glUniform3i(texconv_shader.filter_coefficients_uniform, filter_coefficients[0],
glUniform3f(texconv_shader.filter_coefficients_uniform, filter_coefficients[0],
filter_coefficients[1], filter_coefficients[2]);

const GLuint read_texture = params.depth ?
Expand All @@ -171,6 +171,6 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
g_renderer->RestoreAPIState();
}

} // namespace
} // namespace TextureConverter

} // namespace OGL
2 changes: 1 addition & 1 deletion Source/Core/VideoBackends/Vulkan/TextureCache.cpp
Expand Up @@ -244,7 +244,7 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
};
PixelUniforms uniforms;
for (size_t i = 0; i < filter_coefficients.size(); i++)
uniforms.filter_coefficients[i] = filter_coefficients[i] / 64.0f;
uniforms.filter_coefficients[i] = filter_coefficients[i];
uniforms.gamma_rcp = 1.0f / gamma;
uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
Expand Up @@ -41,10 +41,10 @@ struct EFBEncodeParams
float gamma_rcp;
float clamp_top;
float clamp_bottom;
s32 filter_coefficients[3];
float filter_coefficients[3];
u32 padding;
};
}
} // namespace
TextureConverter::TextureConverter()
{
}
Expand Down
12 changes: 8 additions & 4 deletions Source/Core/VideoCommon/TextureCacheBase.cpp
Expand Up @@ -1506,10 +1506,14 @@ TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterC
{
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return {static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4]),
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
return {
static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
64.0f};
}

TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/TextureCacheBase.h
Expand Up @@ -109,7 +109,7 @@ class TextureCacheBase

public:
// Reduced version of the full coefficient array, reduced to a single value for each row.
using CopyFilterCoefficientArray = std::array<u32, 3>;
using CopyFilterCoefficientArray = std::array<float, 3>;

struct TCacheEntry
{
Expand Down
23 changes: 10 additions & 13 deletions Source/Core/VideoCommon/TextureConversionShader.cpp
Expand Up @@ -67,7 +67,7 @@ static void WriteHeader(char*& p, APIType ApiType)
WRITE(p, "uniform float y_scale;\n");
WRITE(p, "uniform float gamma_rcp;\n");
WRITE(p, "uniform float2 clamp_tb;\n");
WRITE(p, "uniform int3 filter_coefficients;\n");
WRITE(p, "uniform float3 filter_coefficients;\n");
WRITE(p, "#define samp0 samp9\n");
WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n");
WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
Expand All @@ -79,7 +79,7 @@ static void WriteHeader(char*& p, APIType ApiType)
WRITE(p, " float y_scale;\n");
WRITE(p, " float gamma_rcp;\n");
WRITE(p, " float2 clamp_tb;\n");
WRITE(p, " int3 filter_coefficients;\n");
WRITE(p, " float3 filter_coefficients;\n");
WRITE(p, "};\n");
WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
Expand All @@ -91,7 +91,7 @@ static void WriteHeader(char*& p, APIType ApiType)
WRITE(p, " float y_scale;\n");
WRITE(p, " float gamma_rcp;\n");
WRITE(p, " float2 clamp_tb;\n");
WRITE(p, " int3 filter_coefficients;\n");
WRITE(p, " float3 filter_coefficients;\n");
WRITE(p, "};\n");
WRITE(p, "sampler samp0 : register(s0);\n");
WRITE(p, "Texture2DArray Tex0 : register(t0);\n");
Expand Down Expand Up @@ -191,21 +191,18 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
WRITE(p, " float4 next_row = ");
WriteSampleOp(1);
WRITE(p, ";\n");
WRITE(
p,
" float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
" int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
" int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
" int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
WRITE(p, " return float4(col, current_row.a);\n");
WRITE(p, " return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
" float3(1, 1, 1)), current_row.a);\n");
}
else
{
WRITE(p, " float4 current_row = ");
WriteSampleOp(0);
WRITE(p, ";\n");
WRITE(p, " return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], "
"int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n");
WRITE(p, "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
}
WRITE(p, "}\n");
}
Expand Down Expand Up @@ -1422,4 +1419,4 @@ std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_form
return ss.str();
}

} // namespace
} // namespace TextureConversionShaderTiled
10 changes: 6 additions & 4 deletions Source/Core/VideoCommon/TextureConverterShaderGen.cpp
Expand Up @@ -97,15 +97,17 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], current_row.a);\n");
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
" float3(1, 1, 1)), current_row.a);\n");
}
else
{
out.Write(
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 texcol = float4(current_row.rgb * filter_coefficients[1], current_row.a);\n");
" float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
}

if (uid_data->is_depth_copy)
Expand Down