dolphin-emu · JMC47 · Jul 17, 2022 · Feb 23, 2022 · Feb 23, 2022 · Feb 23, 2022
diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp
@@ -591,9 +591,9 @@ void FifoPlayer::ClearEfb()
   UPE_Copy copy = bpmem.triggerEFBCopy;
   copy.clamp_top = false;
   copy.clamp_bottom = false;
-  copy.yuv = false;
+  copy.unknown_bit = false;
   copy.target_pixel_format = static_cast<u32>(EFBCopyFormat::RGBA8) << 1;
-  copy.gamma = 0;
+  copy.gamma = GammaCorrection::Gamma1_0;
   copy.half_scale = false;
   copy.scale_invert = false;
   copy.clear = true;

diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h
@@ -14,15 +14,15 @@ class TextureCache final : public TextureCacheBase
                u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
                const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
                float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
-               const EFBCopyFilterCoefficients& filter_coefficients) override
+               const std::array<u32, 3>& filter_coefficients) override
   {
   }
 
   void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
                            const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
                            bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
                            float gamma, bool clamp_top, bool clamp_bottom,
-                           const EFBCopyFilterCoefficients& filter_coefficients) override
+                           const std::array<u32, 3>& filter_coefficients) override
   {
   }
 };

diff --git a/Source/Core/VideoBackends/Software/EfbInterface.cpp b/Source/Core/VideoBackends/Software/EfbInterface.cpp
@@ -535,9 +535,14 @@ static yuv444 ConvertColorToYUV(u32 color)
 
   // GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
   // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
-  return {static_cast<u8>(0.257f * red + 0.504f * green + 0.098f * blue),
-          static_cast<s8>(-0.148f * red + -0.291f * green + 0.439f * blue),
-          static_cast<s8>(0.439f * red + -0.368f * green + -0.071f * blue)};
+  // These numbers were determined by hardware testing
+  const u16 y = +66 * red + 129 * green + +25 * blue;
+  const s16 u = -38 * red + -74 * green + 112 * blue;
+  const s16 v = 112 * red + -94 * green + -18 * blue;
+  const u8 y_round = static_cast<u8>((y >> 8) + ((y >> 7) & 1));
+  const s8 u_round = static_cast<s8>((u >> 8) + ((u >> 7) & 1));
+  const s8 v_round = static_cast<s8>((v >> 8) + ((v >> 7) & 1));
+  return {y_round, u_round, v_round};
 }
 
 u32 GetDepth(u16 x, u16 y)

diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h
@@ -14,7 +14,7 @@ class TextureCache : public TextureCacheBase
                u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
                const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
                float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
-               const EFBCopyFilterCoefficients& filter_coefficients) override
+               const std::array<u32, 3>& filter_coefficients) override
   {
     TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
                            src_rect, scale_by_half, y_scale, gamma);
@@ -23,7 +23,7 @@ class TextureCache : public TextureCacheBase
                            const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
                            bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
                            float gamma, bool clamp_top, bool clamp_bottom,
-                           const EFBCopyFilterCoefficients& filter_coefficients) override
+                           const std::array<u32, 3>& filter_coefficients) override
   {
     // TODO: If we ever want to "fake" vram textures, we would need to implement this
   }

diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h
@@ -2035,17 +2035,30 @@ struct fmt::formatter<FrameToField> : EnumFormatter<FrameToField::InterlacedOdd>
   constexpr formatter() : EnumFormatter(names) {}
 };
 
+enum class GammaCorrection : u32
+{
+  Gamma1_0 = 0,
+  Gamma1_7 = 1,
+  Gamma2_2 = 2,
+  // Hardware testing indicates this behaves the same as Gamma2_2
+  Invalid2_2 = 3,
+};
+template <>
+struct fmt::formatter<GammaCorrection> : EnumFormatter<GammaCorrection::Invalid2_2>
+{
+  constexpr formatter() : EnumFormatter({"1.0", "1.7", "2.2", "Invalid 2.2"}) {}
+};
+
 union UPE_Copy
 {
   u32 Hex;
 
-  BitField<0, 1, bool, u32> clamp_top;      // if set clamp top
-  BitField<1, 1, bool, u32> clamp_bottom;   // if set clamp bottom
-  BitField<2, 1, bool, u32> yuv;            // if set, color conversion from RGB to YUV
+  BitField<0, 1, bool, u32> clamp_top;     // if set clamp top
+  BitField<1, 1, bool, u32> clamp_bottom;  // if set clamp bottom
+  BitField<2, 1, u32> unknown_bit;
   BitField<3, 4, u32> target_pixel_format;  // realformat is (fmt/2)+((fmt&1)*8).... for some reason
                                             // the msb is the lsb (pattern: cycling right shift)
-  // gamma correction.. 0 = 1.0 ; 1 = 1.7 ; 2 = 2.2 ; 3 is reserved
-  BitField<7, 2, u32> gamma;
+  BitField<7, 2, GammaCorrection> gamma;
   // "mipmap" filter... false = no filter (scale 1:1) ; true = box filter (scale 2:1)
   BitField<9, 1, bool, u32> half_scale;
   BitField<10, 1, bool, u32> scale_invert;  // if set vertical scaling is on
@@ -2084,23 +2097,10 @@ struct fmt::formatter<UPE_Copy>
       else
         clamp = "None";
     }
-    std::string_view gamma = "Invalid";
-    switch (copy.gamma)
-    {
-    case 0:
-      gamma = "1.0";
-      break;
-    case 1:
-      gamma = "1.7";
-      break;
-    case 2:
-      gamma = "2.2";
-      break;
-    }
 
     return fmt::format_to(ctx.out(),
                           "Clamping: {}\n"
-                          "Converting from RGB to YUV: {}\n"
+                          "Unknown bit: {}\n"
                           "Target pixel format: {}\n"
                           "Gamma correction: {}\n"
                           "Half scale: {}\n"
@@ -2110,7 +2110,7 @@ struct fmt::formatter<UPE_Copy>
                           "Copy to XFB: {}\n"
                           "Intensity format: {}\n"
                           "Automatic color conversion: {}",
-                          clamp, no_yes[copy.yuv], copy.tp_realFormat(), gamma,
+                          clamp, copy.unknown_bit, copy.tp_realFormat(), copy.gamma,
                           no_yes[copy.half_scale], no_yes[copy.scale_invert], no_yes[copy.clear],
                           copy.frame_to_field, no_yes[copy.copy_to_xfb], no_yes[copy.intensity_fmt],
                           no_yes[copy.auto_conv]);
@@ -2123,10 +2123,12 @@ union CopyFilterCoefficients
 
   u64 Hex;
 
+  BitField<0, 32, u32, u64> Low;
   BitField<0, 6, u64> w0;
   BitField<6, 6, u64> w1;
   BitField<12, 6, u64> w2;
   BitField<18, 6, u64> w3;
+  BitField<32, 32, u32, u64> High;
   BitField<32, 6, u64> w4;
   BitField<38, 6, u64> w5;
   BitField<44, 6, u64> w6;

diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp
@@ -11,6 +11,7 @@
 #include <fmt/format.h>
 
 #include "Common/CommonTypes.h"
+#include "Common/EnumMap.h"
 #include "Common/Logging/Log.h"
 
 #include "Core/ConfigManager.h"
@@ -42,7 +43,8 @@
 
 using namespace BPFunctions;
 
-static const float s_gammaLUT[] = {1.0f, 1.7f, 2.2f, 1.0f};
+static constexpr Common::EnumMap<float, GammaCorrection::Invalid2_2> s_gammaLUT = {1.0f, 1.7f, 2.2f,
+                                                                                   2.2f};
 
 void BPInit()
 {
@@ -276,9 +278,9 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
       bool is_depth_copy = bpmem.zcontrol.pixel_format == PixelFormat::Z24;
       g_texture_cache->CopyRenderTargetToTexture(
           destAddr, PE_copy.tp_realFormat(), copy_width, copy_height, destStride, is_depth_copy,
-          srcRect, PE_copy.intensity_fmt, PE_copy.half_scale, 1.0f, 1.0f,
-          bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom,
-          bpmem.copyfilter.GetCoefficients());
+          srcRect, PE_copy.intensity_fmt && PE_copy.auto_conv, PE_copy.half_scale, 1.0f,
+          s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top,
+          bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients());
     }
     else
     {

diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp
@@ -550,7 +550,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
   break;
 
   default:
-    WARN_LOG_FMT(VIDEO, "From format {} is not supported", static_cast<u32>(from_format));
+    WARN_LOG_FMT(VIDEO, "From format {} is not supported", from_format);
     return "{}\n";
   }
 
@@ -602,7 +602,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
   }
   break;
   default:
-    WARN_LOG_FMT(VIDEO, "To format {} is not supported", static_cast<u32>(to_format));
+    WARN_LOG_FMT(VIDEO, "To format {} is not supported", to_format);
     return "{}\n";
   }
 

diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp
@@ -276,8 +276,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, const u8* palette, TLU
   const AbstractPipeline* pipeline = g_shader_cache->GetPaletteConversionPipeline(tlutfmt);
   if (!pipeline)
   {
-    ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {:#04X}",
-                  static_cast<u32>(tlutfmt));
+    ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {}", tlutfmt);
     return nullptr;
   }
 
@@ -345,9 +344,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEn
       g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format);
   if (!pipeline)
   {
-    ERROR_LOG_FMT(VIDEO,
-                  "Failed to obtain texture reinterpreting pipeline from format {:#04X} to {:#04X}",
-                  static_cast<u32>(existing_entry->format.texfmt), static_cast<u32>(new_format));
+    ERROR_LOG_FMT(VIDEO, "Failed to obtain texture reinterpreting pipeline from format {} to {}",
+                  existing_entry->format.texfmt, new_format);
     return nullptr;
   }
 
@@ -1980,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
   }
 }
 
-EFBCopyFilterCoefficients
+std::array<u32, 3>
 TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
 {
   // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
   // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
-  return EFBCopyFilterCoefficients{
-      static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
-          64.0f,
-      static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
-                         static_cast<u32>(coefficients[4])) /
-          64.0f,
-      static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
-          64.0f,
+  return {
+      static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
+      static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
+          static_cast<u32>(coefficients[4]),
+      static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6]),
   };
 }
 
-EFBCopyFilterCoefficients
+std::array<u32, 3>
 TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
 {
   // If the user disables the copy filter, only apply it to the VRAM copy.
   // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
-  EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients);
+  std::array<u32, 3> res = GetRAMCopyFilterCoefficients(coefficients);
   if (!g_ActiveConfig.bDisableCopyFilter)
     return res;
 
   // Disabling the copy filter in options should not ignore the values the game sets completely,
   // as some games use the filter coefficients to control the brightness of the screen. Instead,
   // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
-  res.middle = res.upper + res.middle + res.lower;
-  res.upper = 0.0f;
-  res.lower = 0.0f;
+  res[1] = res[0] + res[1] + res[2];
+  res[0] = 0;
+  res[2] = 0;
   return res;
 }
 
-bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients)
+bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients)
 {
   // If the top/bottom coefficients are zero, no point sampling/blending from these rows.
-  return coefficients.upper != 0 || coefficients.lower != 0;
+  return coefficients[0] != 0 || coefficients[2] != 0;
+}
+
+bool TextureCacheBase::CopyFilterCanOverflow(const std::array<u32, 3>& coefficients)
+{
+  // Normally, the copy filter coefficients will sum to at most 64.  If the sum is higher than that,
+  // colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping
+  // breaks (as colors end up >= 512, which wraps back to 0).
+  return coefficients[0] + coefficients[1] + coefficients[2] >= 128;
 }
 
 void TextureCacheBase::CopyRenderTargetToTexture(
@@ -2257,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture(
 
   if (copy_to_ram)
   {
-    EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
+    const std::array<u32, 3> coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
     PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
     EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
-                         NeedsCopyFilterInShader(coefficients));
+                         AllCopyFilterCoefsNeeded(coefficients),
+                         CopyFilterCanOverflow(coefficients), gamma != 1.0);
 
     std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
     if (staging_texture)
@@ -2718,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
                                            bool scale_by_half, bool linear_filter,
                                            EFBCopyFormat dst_format, bool is_intensity, float gamma,
                                            bool clamp_top, bool clamp_bottom,
-                                           const EFBCopyFilterCoefficients& filter_coefficients)
+                                           const std::array<u32, 3>& filter_coefficients)
 {
   // Flush EFB pokes first, as they're expected to be included.
   g_framebuffer_manager->FlushEFBPokes();
 
   // Get the pipeline which we will be using. If the compilation failed, this will be null.
-  const AbstractPipeline* copy_pipeline =
-      g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
-          dst_format, is_depth_copy, is_intensity, scale_by_half,
-          NeedsCopyFilterInShader(filter_coefficients)));
+  const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline(
+      TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
+                                               scale_by_half, 1.0f / gamma, filter_coefficients));
   if (!copy_pipeline)
   {
     WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
@@ -2748,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
   struct Uniforms
   {
     float src_left, src_top, src_width, src_height;
-    float filter_coefficients[3];
+    std::array<u32, 3> filter_coefficients;
     float gamma_rcp;
     float clamp_top;
     float clamp_bottom;
@@ -2763,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
   uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
   uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
   uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
-  uniforms.filter_coefficients[0] = filter_coefficients.upper;
-  uniforms.filter_coefficients[1] = filter_coefficients.middle;
-  uniforms.filter_coefficients[2] = filter_coefficients.lower;
+  uniforms.filter_coefficients = filter_coefficients;
   uniforms.gamma_rcp = 1.0f / gamma;
   //   NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
   //         which returns random garbage from the empty bus (confirmed by hardware tests).
@@ -2797,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
                                u32 memory_stride, const MathUtil::Rectangle<int>& src_rect,
                                bool scale_by_half, bool linear_filter, float y_scale, float gamma,
                                bool clamp_top, bool clamp_bottom,
-                               const EFBCopyFilterCoefficients& filter_coefficients)
+                               const std::array<u32, 3>& filter_coefficients)
 {
   // Flush EFB pokes first, as they're expected to be included.
   g_framebuffer_manager->FlushEFBPokes();
@@ -2828,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
     float gamma_rcp;
     float clamp_top;
     float clamp_bottom;
-    float filter_coefficients[3];
+    std::array<u32, 3> filter_coefficients;
     u32 padding;
   };
   Uniforms encoder_params;
@@ -2849,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
   encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height;
   const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1;
   encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height;
-  encoder_params.filter_coefficients[0] = filter_coefficients.upper;
-  encoder_params.filter_coefficients[1] = filter_coefficients.middle;
-  encoder_params.filter_coefficients[2] = filter_coefficients.lower;
+  encoder_params.filter_coefficients = filter_coefficients;
   g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));
 
   // Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.