Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accurately handle the copy filter and gamma for EFB and XFB copies #10466

Merged
merged 11 commits into from
Jul 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions Source/Core/Core/FifoPlayer/FifoPlayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,9 +591,9 @@ void FifoPlayer::ClearEfb()
UPE_Copy copy = bpmem.triggerEFBCopy;
copy.clamp_top = false;
copy.clamp_bottom = false;
copy.yuv = false;
copy.unknown_bit = false;
copy.target_pixel_format = static_cast<u32>(EFBCopyFormat::RGBA8) << 1;
copy.gamma = 0;
copy.gamma = GammaCorrection::Gamma1_0;
copy.half_scale = false;
copy.scale_invert = false;
copy.clear = true;
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/Null/TextureCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ class TextureCache final : public TextureCacheBase
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
}

void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
}
};
Expand Down
11 changes: 8 additions & 3 deletions Source/Core/VideoBackends/Software/EfbInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -535,9 +535,14 @@ static yuv444 ConvertColorToYUV(u32 color)

// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
return {static_cast<u8>(0.257f * red + 0.504f * green + 0.098f * blue),
static_cast<s8>(-0.148f * red + -0.291f * green + 0.439f * blue),
static_cast<s8>(0.439f * red + -0.368f * green + -0.071f * blue)};
// These numbers were determined by hardware testing
const u16 y = +66 * red + 129 * green + +25 * blue;
const s16 u = -38 * red + -74 * green + 112 * blue;
const s16 v = 112 * red + -94 * green + -18 * blue;
const u8 y_round = static_cast<u8>((y >> 8) + ((y >> 7) & 1));
const s8 u_round = static_cast<s8>((u >> 8) + ((u >> 7) & 1));
const s8 v_round = static_cast<s8>((v >> 8) + ((v >> 7) & 1));
return {y_round, u_round, v_round};
}

u32 GetDepth(u16 x, u16 y)
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/Software/TextureCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class TextureCache : public TextureCacheBase
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
src_rect, scale_by_half, y_scale, gamma);
Expand All @@ -23,7 +23,7 @@ class TextureCache : public TextureCacheBase
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
// TODO: If we ever want to "fake" vram textures, we would need to implement this
}
Expand Down
42 changes: 22 additions & 20 deletions Source/Core/VideoCommon/BPMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -2035,17 +2035,30 @@ struct fmt::formatter<FrameToField> : EnumFormatter<FrameToField::InterlacedOdd>
constexpr formatter() : EnumFormatter(names) {}
};

enum class GammaCorrection : u32
{
Gamma1_0 = 0,
Gamma1_7 = 1,
Gamma2_2 = 2,
// Hardware testing indicates this behaves the same as Gamma2_2
Invalid2_2 = 3,
};
template <>
struct fmt::formatter<GammaCorrection> : EnumFormatter<GammaCorrection::Invalid2_2>
{
constexpr formatter() : EnumFormatter({"1.0", "1.7", "2.2", "Invalid 2.2"}) {}
};

union UPE_Copy
{
u32 Hex;

BitField<0, 1, bool, u32> clamp_top; // if set clamp top
BitField<1, 1, bool, u32> clamp_bottom; // if set clamp bottom
BitField<2, 1, bool, u32> yuv; // if set, color conversion from RGB to YUV
BitField<0, 1, bool, u32> clamp_top; // if set clamp top
BitField<1, 1, bool, u32> clamp_bottom; // if set clamp bottom
BitField<2, 1, u32> unknown_bit;
BitField<3, 4, u32> target_pixel_format; // realformat is (fmt/2)+((fmt&1)*8).... for some reason
// the msb is the lsb (pattern: cycling right shift)
// gamma correction.. 0 = 1.0 ; 1 = 1.7 ; 2 = 2.2 ; 3 is reserved
BitField<7, 2, u32> gamma;
BitField<7, 2, GammaCorrection> gamma;
// "mipmap" filter... false = no filter (scale 1:1) ; true = box filter (scale 2:1)
BitField<9, 1, bool, u32> half_scale;
BitField<10, 1, bool, u32> scale_invert; // if set vertical scaling is on
Expand Down Expand Up @@ -2084,23 +2097,10 @@ struct fmt::formatter<UPE_Copy>
else
clamp = "None";
}
std::string_view gamma = "Invalid";
switch (copy.gamma)
{
case 0:
gamma = "1.0";
break;
case 1:
gamma = "1.7";
break;
case 2:
gamma = "2.2";
break;
}

return fmt::format_to(ctx.out(),
"Clamping: {}\n"
"Converting from RGB to YUV: {}\n"
"Unknown bit: {}\n"
"Target pixel format: {}\n"
"Gamma correction: {}\n"
"Half scale: {}\n"
Expand All @@ -2110,7 +2110,7 @@ struct fmt::formatter<UPE_Copy>
"Copy to XFB: {}\n"
"Intensity format: {}\n"
"Automatic color conversion: {}",
clamp, no_yes[copy.yuv], copy.tp_realFormat(), gamma,
clamp, copy.unknown_bit, copy.tp_realFormat(), copy.gamma,
no_yes[copy.half_scale], no_yes[copy.scale_invert], no_yes[copy.clear],
copy.frame_to_field, no_yes[copy.copy_to_xfb], no_yes[copy.intensity_fmt],
no_yes[copy.auto_conv]);
Expand All @@ -2123,10 +2123,12 @@ union CopyFilterCoefficients

u64 Hex;

BitField<0, 32, u32, u64> Low;
BitField<0, 6, u64> w0;
BitField<6, 6, u64> w1;
BitField<12, 6, u64> w2;
BitField<18, 6, u64> w3;
BitField<32, 32, u32, u64> High;
BitField<32, 6, u64> w4;
BitField<38, 6, u64> w5;
BitField<44, 6, u64> w6;
Expand Down
10 changes: 6 additions & 4 deletions Source/Core/VideoCommon/BPStructs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <fmt/format.h>

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h"

#include "Core/ConfigManager.h"
Expand Down Expand Up @@ -42,7 +43,8 @@

using namespace BPFunctions;

static const float s_gammaLUT[] = {1.0f, 1.7f, 2.2f, 1.0f};
static constexpr Common::EnumMap<float, GammaCorrection::Invalid2_2> s_gammaLUT = {1.0f, 1.7f, 2.2f,
2.2f};

void BPInit()
{
Expand Down Expand Up @@ -276,9 +278,9 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
bool is_depth_copy = bpmem.zcontrol.pixel_format == PixelFormat::Z24;
g_texture_cache->CopyRenderTargetToTexture(
destAddr, PE_copy.tp_realFormat(), copy_width, copy_height, destStride, is_depth_copy,
srcRect, PE_copy.intensity_fmt, PE_copy.half_scale, 1.0f, 1.0f,
bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom,
bpmem.copyfilter.GetCoefficients());
srcRect, PE_copy.intensity_fmt && PE_copy.auto_conv, PE_copy.half_scale, 1.0f,
s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top,
bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients());
}
else
{
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoCommon/FramebufferShaderGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
break;

default:
WARN_LOG_FMT(VIDEO, "From format {} is not supported", static_cast<u32>(from_format));
WARN_LOG_FMT(VIDEO, "From format {} is not supported", from_format);
return "{}\n";
}

Expand Down Expand Up @@ -602,7 +602,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
}
break;
default:
WARN_LOG_FMT(VIDEO, "To format {} is not supported", static_cast<u32>(to_format));
WARN_LOG_FMT(VIDEO, "To format {} is not supported", to_format);
return "{}\n";
}

Expand Down
73 changes: 36 additions & 37 deletions Source/Core/VideoCommon/TextureCacheBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, const u8* palette, TLU
const AbstractPipeline* pipeline = g_shader_cache->GetPaletteConversionPipeline(tlutfmt);
if (!pipeline)
{
ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {:#04X}",
static_cast<u32>(tlutfmt));
ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {}", tlutfmt);
return nullptr;
}

Expand Down Expand Up @@ -345,9 +344,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEn
g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format);
if (!pipeline)
{
ERROR_LOG_FMT(VIDEO,
"Failed to obtain texture reinterpreting pipeline from format {:#04X} to {:#04X}",
static_cast<u32>(existing_entry->format.texfmt), static_cast<u32>(new_format));
ERROR_LOG_FMT(VIDEO, "Failed to obtain texture reinterpreting pipeline from format {} to {}",
existing_entry->format.texfmt, new_format);
return nullptr;
}

Expand Down Expand Up @@ -1980,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
}
}

EFBCopyFilterCoefficients
std::array<u32, 3>
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return EFBCopyFilterCoefficients{
static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
64.0f,
return {
static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4]),
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6]),
};
}

EFBCopyFilterCoefficients
std::array<u32, 3>
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients);
std::array<u32, 3> res = GetRAMCopyFilterCoefficients(coefficients);
if (!g_ActiveConfig.bDisableCopyFilter)
return res;

// Disabling the copy filter in options should not ignore the values the game sets completely,
// as some games use the filter coefficients to control the brightness of the screen. Instead,
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
res.middle = res.upper + res.middle + res.lower;
res.upper = 0.0f;
res.lower = 0.0f;
res[1] = res[0] + res[1] + res[2];
res[0] = 0;
res[2] = 0;
return res;
}

bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients)
bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients)
{
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
return coefficients.upper != 0 || coefficients.lower != 0;
return coefficients[0] != 0 || coefficients[2] != 0;
}

bool TextureCacheBase::CopyFilterCanOverflow(const std::array<u32, 3>& coefficients)
{
// Normally, the copy filter coefficients will sum to at most 64. If the sum is higher than that,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I recall this overflow behavior but I forget the details I know games can go higher than 64 to induce brightening of the overall image. Can you give any more details - how does the clamping break?

And are there any known occurrences of a game using 128 or higher?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the value for a color channel is 512 or higher, it ends up wrapping around to 0 again (so presumably when the clamping to [0, 255] is applied, they use a field that can store [0, 511]). I don't know of any situation where this behavior would be useful, but it's something I encountered in my hardware test, so I implemented it. (It's conditionally enabled so that there's no performance cost for situations where it's impossible for the value to reach 512.)

(I actually don't know of any games that go above 64 to produce brightening, only of ones that go below 64 to produce darkening - they probably exist, though.)

// colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping
// breaks (as colors end up >= 512, which wraps back to 0).
return coefficients[0] + coefficients[1] + coefficients[2] >= 128;
}

void TextureCacheBase::CopyRenderTargetToTexture(
Expand Down Expand Up @@ -2257,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture(

if (copy_to_ram)
{
EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
const std::array<u32, 3> coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
AllCopyFilterCoefsNeeded(coefficients),
CopyFilterCanOverflow(coefficients), gamma != 1.0);

std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
if (staging_texture)
Expand Down Expand Up @@ -2718,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
bool scale_by_half, bool linear_filter,
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
const std::array<u32, 3>& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();

// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline =
g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
dst_format, is_depth_copy, is_intensity, scale_by_half,
NeedsCopyFilterInShader(filter_coefficients)));
const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline(
TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half, 1.0f / gamma, filter_coefficients));
if (!copy_pipeline)
{
WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
Expand All @@ -2748,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
struct Uniforms
{
float src_left, src_top, src_width, src_height;
float filter_coefficients[3];
std::array<u32, 3> filter_coefficients;
float gamma_rcp;
float clamp_top;
float clamp_bottom;
Expand All @@ -2763,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
uniforms.filter_coefficients[0] = filter_coefficients.upper;
uniforms.filter_coefficients[1] = filter_coefficients.middle;
uniforms.filter_coefficients[2] = filter_coefficients.lower;
uniforms.filter_coefficients = filter_coefficients;
uniforms.gamma_rcp = 1.0f / gamma;
// NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
// which returns random garbage from the empty bus (confirmed by hardware tests).
Expand Down Expand Up @@ -2797,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
u32 memory_stride, const MathUtil::Rectangle<int>& src_rect,
bool scale_by_half, bool linear_filter, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
const std::array<u32, 3>& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
Expand Down Expand Up @@ -2828,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float filter_coefficients[3];
std::array<u32, 3> filter_coefficients;
u32 padding;
};
Uniforms encoder_params;
Expand All @@ -2849,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height;
const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1;
encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height;
encoder_params.filter_coefficients[0] = filter_coefficients.upper;
encoder_params.filter_coefficients[1] = filter_coefficients.middle;
encoder_params.filter_coefficients[2] = filter_coefficients.lower;
encoder_params.filter_coefficients = filter_coefficients;
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));

// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.
Expand Down