Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #10466 from Pokechu22/efb-copy-gamma
Accurately handle the copy filter and gamma for EFB and XFB copies
  • Loading branch information
JMC47 committed Jul 17, 2022
2 parents 2ef069e + a6e06f3 commit f1d23ff
Show file tree
Hide file tree
Showing 15 changed files with 396 additions and 794 deletions.
4 changes: 2 additions & 2 deletions Source/Core/Core/FifoPlayer/FifoPlayer.cpp
Expand Up @@ -591,9 +591,9 @@ void FifoPlayer::ClearEfb()
UPE_Copy copy = bpmem.triggerEFBCopy;
copy.clamp_top = false;
copy.clamp_bottom = false;
copy.yuv = false;
copy.unknown_bit = false;
copy.target_pixel_format = static_cast<u32>(EFBCopyFormat::RGBA8) << 1;
copy.gamma = 0;
copy.gamma = GammaCorrection::Gamma1_0;
copy.half_scale = false;
copy.scale_invert = false;
copy.clear = true;
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/Null/TextureCache.h
Expand Up @@ -14,15 +14,15 @@ class TextureCache final : public TextureCacheBase
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
}

void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
}
};
Expand Down
11 changes: 8 additions & 3 deletions Source/Core/VideoBackends/Software/EfbInterface.cpp
Expand Up @@ -535,9 +535,14 @@ static yuv444 ConvertColorToYUV(u32 color)

// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
return {static_cast<u8>(0.257f * red + 0.504f * green + 0.098f * blue),
static_cast<s8>(-0.148f * red + -0.291f * green + 0.439f * blue),
static_cast<s8>(0.439f * red + -0.368f * green + -0.071f * blue)};
// These numbers were determined by hardware testing
const u16 y = +66 * red + 129 * green + +25 * blue;
const s16 u = -38 * red + -74 * green + 112 * blue;
const s16 v = 112 * red + -94 * green + -18 * blue;
const u8 y_round = static_cast<u8>((y >> 8) + ((y >> 7) & 1));
const s8 u_round = static_cast<s8>((u >> 8) + ((u >> 7) & 1));
const s8 v_round = static_cast<s8>((v >> 8) + ((v >> 7) & 1));
return {y_round, u_round, v_round};
}

u32 GetDepth(u16 x, u16 y)
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/Software/TextureCache.h
Expand Up @@ -14,7 +14,7 @@ class TextureCache : public TextureCacheBase
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
src_rect, scale_by_half, y_scale, gamma);
Expand All @@ -23,7 +23,7 @@ class TextureCache : public TextureCacheBase
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
// TODO: If we ever want to "fake" vram textures, we would need to implement this
}
Expand Down
42 changes: 22 additions & 20 deletions Source/Core/VideoCommon/BPMemory.h
Expand Up @@ -2035,17 +2035,30 @@ struct fmt::formatter<FrameToField> : EnumFormatter<FrameToField::InterlacedOdd>
constexpr formatter() : EnumFormatter(names) {}
};

enum class GammaCorrection : u32
{
Gamma1_0 = 0,
Gamma1_7 = 1,
Gamma2_2 = 2,
// Hardware testing indicates this behaves the same as Gamma2_2
Invalid2_2 = 3,
};
template <>
struct fmt::formatter<GammaCorrection> : EnumFormatter<GammaCorrection::Invalid2_2>
{
constexpr formatter() : EnumFormatter({"1.0", "1.7", "2.2", "Invalid 2.2"}) {}
};

union UPE_Copy
{
u32 Hex;

BitField<0, 1, bool, u32> clamp_top; // if set clamp top
BitField<1, 1, bool, u32> clamp_bottom; // if set clamp bottom
BitField<2, 1, bool, u32> yuv; // if set, color conversion from RGB to YUV
BitField<0, 1, bool, u32> clamp_top; // if set clamp top
BitField<1, 1, bool, u32> clamp_bottom; // if set clamp bottom
BitField<2, 1, u32> unknown_bit;
BitField<3, 4, u32> target_pixel_format; // realformat is (fmt/2)+((fmt&1)*8).... for some reason
// the msb is the lsb (pattern: cycling right shift)
// gamma correction.. 0 = 1.0 ; 1 = 1.7 ; 2 = 2.2 ; 3 is reserved
BitField<7, 2, u32> gamma;
BitField<7, 2, GammaCorrection> gamma;
// "mipmap" filter... false = no filter (scale 1:1) ; true = box filter (scale 2:1)
BitField<9, 1, bool, u32> half_scale;
BitField<10, 1, bool, u32> scale_invert; // if set vertical scaling is on
Expand Down Expand Up @@ -2084,23 +2097,10 @@ struct fmt::formatter<UPE_Copy>
else
clamp = "None";
}
std::string_view gamma = "Invalid";
switch (copy.gamma)
{
case 0:
gamma = "1.0";
break;
case 1:
gamma = "1.7";
break;
case 2:
gamma = "2.2";
break;
}

return fmt::format_to(ctx.out(),
"Clamping: {}\n"
"Converting from RGB to YUV: {}\n"
"Unknown bit: {}\n"
"Target pixel format: {}\n"
"Gamma correction: {}\n"
"Half scale: {}\n"
Expand All @@ -2110,7 +2110,7 @@ struct fmt::formatter<UPE_Copy>
"Copy to XFB: {}\n"
"Intensity format: {}\n"
"Automatic color conversion: {}",
clamp, no_yes[copy.yuv], copy.tp_realFormat(), gamma,
clamp, copy.unknown_bit, copy.tp_realFormat(), copy.gamma,
no_yes[copy.half_scale], no_yes[copy.scale_invert], no_yes[copy.clear],
copy.frame_to_field, no_yes[copy.copy_to_xfb], no_yes[copy.intensity_fmt],
no_yes[copy.auto_conv]);
Expand All @@ -2123,10 +2123,12 @@ union CopyFilterCoefficients

u64 Hex;

BitField<0, 32, u32, u64> Low;
BitField<0, 6, u64> w0;
BitField<6, 6, u64> w1;
BitField<12, 6, u64> w2;
BitField<18, 6, u64> w3;
BitField<32, 32, u32, u64> High;
BitField<32, 6, u64> w4;
BitField<38, 6, u64> w5;
BitField<44, 6, u64> w6;
Expand Down
10 changes: 6 additions & 4 deletions Source/Core/VideoCommon/BPStructs.cpp
Expand Up @@ -11,6 +11,7 @@
#include <fmt/format.h>

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h"

#include "Core/ConfigManager.h"
Expand Down Expand Up @@ -42,7 +43,8 @@

using namespace BPFunctions;

static const float s_gammaLUT[] = {1.0f, 1.7f, 2.2f, 1.0f};
static constexpr Common::EnumMap<float, GammaCorrection::Invalid2_2> s_gammaLUT = {1.0f, 1.7f, 2.2f,
2.2f};

void BPInit()
{
Expand Down Expand Up @@ -276,9 +278,9 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
bool is_depth_copy = bpmem.zcontrol.pixel_format == PixelFormat::Z24;
g_texture_cache->CopyRenderTargetToTexture(
destAddr, PE_copy.tp_realFormat(), copy_width, copy_height, destStride, is_depth_copy,
srcRect, PE_copy.intensity_fmt, PE_copy.half_scale, 1.0f, 1.0f,
bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom,
bpmem.copyfilter.GetCoefficients());
srcRect, PE_copy.intensity_fmt && PE_copy.auto_conv, PE_copy.half_scale, 1.0f,
s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top,
bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients());
}
else
{
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoCommon/FramebufferShaderGen.cpp
Expand Up @@ -550,7 +550,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
break;

default:
WARN_LOG_FMT(VIDEO, "From format {} is not supported", static_cast<u32>(from_format));
WARN_LOG_FMT(VIDEO, "From format {} is not supported", from_format);
return "{}\n";
}

Expand Down Expand Up @@ -602,7 +602,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
}
break;
default:
WARN_LOG_FMT(VIDEO, "To format {} is not supported", static_cast<u32>(to_format));
WARN_LOG_FMT(VIDEO, "To format {} is not supported", to_format);
return "{}\n";
}

Expand Down
73 changes: 36 additions & 37 deletions Source/Core/VideoCommon/TextureCacheBase.cpp
Expand Up @@ -276,8 +276,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, const u8* palette, TLU
const AbstractPipeline* pipeline = g_shader_cache->GetPaletteConversionPipeline(tlutfmt);
if (!pipeline)
{
ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {:#04X}",
static_cast<u32>(tlutfmt));
ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {}", tlutfmt);
return nullptr;
}

Expand Down Expand Up @@ -345,9 +344,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEn
g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format);
if (!pipeline)
{
ERROR_LOG_FMT(VIDEO,
"Failed to obtain texture reinterpreting pipeline from format {:#04X} to {:#04X}",
static_cast<u32>(existing_entry->format.texfmt), static_cast<u32>(new_format));
ERROR_LOG_FMT(VIDEO, "Failed to obtain texture reinterpreting pipeline from format {} to {}",
existing_entry->format.texfmt, new_format);
return nullptr;
}

Expand Down Expand Up @@ -1980,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
}
}

EFBCopyFilterCoefficients
std::array<u32, 3>
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return EFBCopyFilterCoefficients{
static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
64.0f,
return {
static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4]),
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6]),
};
}

EFBCopyFilterCoefficients
std::array<u32, 3>
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients);
std::array<u32, 3> res = GetRAMCopyFilterCoefficients(coefficients);
if (!g_ActiveConfig.bDisableCopyFilter)
return res;

// Disabling the copy filter in options should not ignore the values the game sets completely,
// as some games use the filter coefficients to control the brightness of the screen. Instead,
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
res.middle = res.upper + res.middle + res.lower;
res.upper = 0.0f;
res.lower = 0.0f;
res[1] = res[0] + res[1] + res[2];
res[0] = 0;
res[2] = 0;
return res;
}

bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients)
bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients)
{
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
return coefficients.upper != 0 || coefficients.lower != 0;
return coefficients[0] != 0 || coefficients[2] != 0;
}

bool TextureCacheBase::CopyFilterCanOverflow(const std::array<u32, 3>& coefficients)
{
// Normally, the copy filter coefficients will sum to at most 64. If the sum is higher than that,
// colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping
// breaks (as colors end up >= 512, which wraps back to 0).
return coefficients[0] + coefficients[1] + coefficients[2] >= 128;
}

void TextureCacheBase::CopyRenderTargetToTexture(
Expand Down Expand Up @@ -2257,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture(

if (copy_to_ram)
{
EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
const std::array<u32, 3> coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
AllCopyFilterCoefsNeeded(coefficients),
CopyFilterCanOverflow(coefficients), gamma != 1.0);

std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
if (staging_texture)
Expand Down Expand Up @@ -2718,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
bool scale_by_half, bool linear_filter,
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
const std::array<u32, 3>& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();

// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline =
g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
dst_format, is_depth_copy, is_intensity, scale_by_half,
NeedsCopyFilterInShader(filter_coefficients)));
const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline(
TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half, 1.0f / gamma, filter_coefficients));
if (!copy_pipeline)
{
WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
Expand All @@ -2748,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
struct Uniforms
{
float src_left, src_top, src_width, src_height;
float filter_coefficients[3];
std::array<u32, 3> filter_coefficients;
float gamma_rcp;
float clamp_top;
float clamp_bottom;
Expand All @@ -2763,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
uniforms.filter_coefficients[0] = filter_coefficients.upper;
uniforms.filter_coefficients[1] = filter_coefficients.middle;
uniforms.filter_coefficients[2] = filter_coefficients.lower;
uniforms.filter_coefficients = filter_coefficients;
uniforms.gamma_rcp = 1.0f / gamma;
// NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
// which returns random garbage from the empty bus (confirmed by hardware tests).
Expand Down Expand Up @@ -2797,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
u32 memory_stride, const MathUtil::Rectangle<int>& src_rect,
bool scale_by_half, bool linear_filter, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
const std::array<u32, 3>& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
Expand Down Expand Up @@ -2828,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float filter_coefficients[3];
std::array<u32, 3> filter_coefficients;
u32 padding;
};
Uniforms encoder_params;
Expand All @@ -2849,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height;
const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1;
encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height;
encoder_params.filter_coefficients[0] = filter_coefficients.upper;
encoder_params.filter_coefficients[1] = filter_coefficients.middle;
encoder_params.filter_coefficients[2] = filter_coefficients.lower;
encoder_params.filter_coefficients = filter_coefficients;
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));

// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.
Expand Down

0 comments on commit f1d23ff

Please sign in to comment.