@@ -33,10 +33,12 @@ static std::unique_ptr<PSTextureEncoder> g_encoder;

void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)
{
g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect,
scale_by_half);
scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients);
}

const char palette_shader[] =
@@ -137,9 +139,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
D3D::stateman->SetTexture(1, palette_buf_srv);

// TODO: Add support for C14X2 format. (Different multiplier, more palette entries.)
float params[4] = {source->format == TextureFormat::I4 ? 15.f : 255.f};
D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, &params, 0, 0);
D3D::stateman->SetPixelConstants(palette_uniform);
float params[8] = {source->format == TextureFormat::I4 ? 15.f : 255.f};
D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &params, 0, 0);
D3D::stateman->SetPixelConstants(uniform_buffer);

const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, source->GetWidth(), source->GetHeight());

@@ -180,7 +182,7 @@ TextureCache::TextureCache()

palette_buf = nullptr;
palette_buf_srv = nullptr;
palette_uniform = nullptr;
uniform_buffer = nullptr;
palette_pixel_shader[static_cast<int>(TLUTFormat::IA8)] = GetConvertShader("IA8");
palette_pixel_shader[static_cast<int>(TLUTFormat::RGB565)] = GetConvertShader("RGB565");
palette_pixel_shader[static_cast<int>(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3");
@@ -195,10 +197,10 @@ TextureCache::TextureCache()
CHECK(SUCCEEDED(hr), "create palette decoder lut srv");
D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv");
const D3D11_BUFFER_DESC cbdesc =
CD3D11_BUFFER_DESC(16, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &palette_uniform);
CD3D11_BUFFER_DESC(sizeof(float) * 8, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &uniform_buffer);
CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer");
D3D::SetDebugObjectName(palette_uniform,
D3D::SetDebugObjectName(uniform_buffer,
"a constant buffer used in TextureCache::CopyRenderTargetToTexture");
}

@@ -209,7 +211,7 @@ TextureCache::~TextureCache()

SAFE_RELEASE(palette_buf);
SAFE_RELEASE(palette_buf_srv);
SAFE_RELEASE(palette_uniform);
SAFE_RELEASE(uniform_buffer);
for (auto*& shader : palette_pixel_shader)
SAFE_RELEASE(shader);
for (auto& iter : m_efb_to_tex_pixel_shaders)
@@ -218,7 +220,9 @@ TextureCache::~TextureCache()

void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity)
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)
{
auto* destination_texture = static_cast<DXTexture*>(entry->texture.get());

@@ -260,6 +264,27 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
else
D3D::SetPointCopySampler();

struct PixelConstants
{
float filter_coefficients[3];
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float pixel_height;
u32 padding;
};
PixelConstants constants;
for (size_t i = 0; i < filter_coefficients.size(); i++)
constants.filter_coefficients[i] = filter_coefficients[i] / 64.0f;
constants.gamma_rcp = 1.0f / gamma;
constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
constants.pixel_height =
g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT;
constants.padding = 0;
D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &constants, 0, 0);
D3D::stateman->SetPixelConstants(uniform_buffer);

// Make sure we don't draw with the texture set as both a source and target.
// (This can happen because we don't unbind textures when we free them.)
D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV());
@@ -34,19 +34,21 @@ class TextureCache : public TextureCacheBase

void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override;
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, EFBCopyFormat dst_format,
bool is_intensity) override;
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

bool CompileShaders() override { return true; }
void DeleteShaders() override {}
ID3D11PixelShader* GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid);

ID3D11Buffer* palette_buf;
ID3D11ShaderResourceView* palette_buf_srv;
ID3D11Buffer* palette_uniform;
ID3D11Buffer* uniform_buffer;
ID3D11PixelShader* palette_pixel_shader[3];

std::map<TextureConversionShaderGen::TCShaderUid, ID3D11PixelShader*> m_efb_to_tex_pixel_shaders;
@@ -92,7 +92,7 @@ TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc)
return result;
}

void Renderer::SwapImpl(AbstractTexture*, const EFBRectangle&, u64, float)
void Renderer::SwapImpl(AbstractTexture*, const EFBRectangle&, u64)
{
UpdateActiveConfig();
}
@@ -34,7 +34,7 @@ class Renderer : public ::Renderer
void BBoxWrite(int index, u16 value) override {}
TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;

void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;

void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z) override
@@ -27,12 +27,15 @@ class TextureCache : public TextureCacheBase

void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
{
}

void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity) override
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
{
}
};
@@ -1365,8 +1365,7 @@ void Renderer::ApplyBlendingState(const BlendingState state, bool force)
}

// This function has the final picture. We adjust the aspect ratio here.
void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks,
float Gamma)
void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks)
{
if (g_ogl_config.bSupportsDebug)
{
@@ -126,7 +126,7 @@ class Renderer : public ::Renderer

TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;

void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;

void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z) override;
@@ -68,10 +68,18 @@ constexpr const char* geometry_program = "layout(triangles) in;\n"

void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)
{
// Flip top/bottom due to lower-left coordinate system.
float clamp_top_val =
clamp_bottom ? (1.0f - src_rect.bottom / static_cast<float>(EFB_HEIGHT)) : 0.0f;
float clamp_bottom_val =
clamp_top ? (1.0f - src_rect.top / static_cast<float>(EFB_HEIGHT)) : 0.0f;
TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y,
memory_stride, src_rect, scale_by_half);
memory_stride, src_rect, scale_by_half, y_scale, gamma,
clamp_top_val, clamp_bottom_val, filter_coefficients);
}

TextureCache::TextureCache()
@@ -483,7 +491,9 @@ void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u

void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity)
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)
{
auto* destination_texture = static_cast<OGLTexture*>(entry->texture.get());
g_renderer->ResetAPIState(); // reset any game specific settings
@@ -528,13 +538,27 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
code.GetBuffer(), geo_program);

shader.position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position");
shader.pixel_height_uniform = glGetUniformLocation(shader.shader.glprogid, "pixel_height");
shader.gamma_rcp_uniform = glGetUniformLocation(shader.shader.glprogid, "gamma_rcp");
shader.clamp_tb_uniform = glGetUniformLocation(shader.shader.glprogid, "clamp_tb");
shader.filter_coefficients_uniform =
glGetUniformLocation(shader.shader.glprogid, "filter_coefficients");
}

shader.shader.Bind();

TargetRectangle R = g_renderer->ConvertEFBRectangle(src_rect);
glUniform4f(shader.position_uniform, static_cast<float>(R.left), static_cast<float>(R.top),
static_cast<float>(R.right), static_cast<float>(R.bottom));
glUniform1f(shader.pixel_height_uniform, g_ActiveConfig.bCopyEFBScaled ?
1.0f / g_renderer->GetTargetHeight() :
1.0f / EFB_HEIGHT);
glUniform1f(shader.gamma_rcp_uniform, 1.0f / gamma);
glUniform2f(shader.clamp_tb_uniform,
clamp_bottom ? (1.0f - src_rect.bottom / static_cast<float>(EFB_HEIGHT)) : 0.0f,
clamp_top ? (1.0f - src_rect.top / static_cast<float>(EFB_HEIGHT)) : 0.0f);
glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0] / 64.0f,
filter_coefficients[1] / 64.0f, filter_coefficients[2] / 64.0f);

ProgramShaderCache::BindVertexFormat(nullptr);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
@@ -65,11 +65,13 @@ class TextureCache : public TextureCacheBase

void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override;
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, EFBCopyFormat dst_format,
bool is_intensity) override;
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

bool CompileShaders() override;
void DeleteShaders() override;
@@ -84,6 +86,10 @@ class TextureCache : public TextureCacheBase
{
SHADER shader;
GLuint position_uniform;
GLuint pixel_height_uniform;
GLuint gamma_rcp_uniform;
GLuint clamp_tb_uniform;
GLuint filter_coefficients_uniform;
};

std::map<TextureConversionShaderGen::TCShaderUid, EFBCopyShader> m_efb_copy_programs;
@@ -39,6 +39,9 @@ struct EncodingProgram
SHADER program;
GLint copy_position_uniform;
GLint y_scale_uniform;
GLint gamma_rcp_uniform;
GLint clamp_tb_uniform;
GLint filter_coefficients_uniform;
};

std::map<EFBCopyParams, EncodingProgram> s_encoding_programs;
@@ -81,6 +84,10 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)

program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position");
program.y_scale_uniform = glGetUniformLocation(program.program.glprogid, "y_scale");
program.gamma_rcp_uniform = glGetUniformLocation(program.program.glprogid, "gamma_rcp");
program.clamp_tb_uniform = glGetUniformLocation(program.program.glprogid, "clamp_tb");
program.filter_coefficients_uniform =
glGetUniformLocation(program.program.glprogid, "filter_coefficients");
return s_encoding_programs.emplace(params, program).first->second;
}

@@ -137,7 +144,9 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line

void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, float clamp_top, float clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
{
g_renderer->ResetAPIState();

@@ -146,14 +155,18 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
texconv_shader.program.Bind();
glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width,
scale_by_half ? 2 : 1);
glUniform1f(texconv_shader.y_scale_uniform, params.y_scale);
glUniform1f(texconv_shader.y_scale_uniform, y_scale);
glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma);
glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom);
glUniform3i(texconv_shader.filter_coefficients_uniform, filter_coefficients[0],
filter_coefficients[1], filter_coefficients[2]);

const GLuint read_texture = params.depth ?
FramebufferManager::ResolveAndGetDepthTarget(src_rect) :
FramebufferManager::ResolveAndGetRenderTarget(src_rect);

EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride,
scale_by_half && !params.depth, params.y_scale);
scale_by_half && !params.depth, y_scale);

g_renderer->RestoreAPIState();
}
@@ -7,10 +7,9 @@
#include "Common/CommonTypes.h"
#include "Common/GL/GLUtil.h"

#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoCommon.h"

struct EFBCopyParams;

namespace OGL
{
// Converts textures between formats using shaders
@@ -21,9 +20,11 @@ void Init();
void Shutdown();

// returns size of the encoded data (in bytes)
void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half);
void EncodeToRamFromTexture(
u8* dest_ptr, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma, float clamp_top, float clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);
}

} // namespace OGL
@@ -469,10 +469,59 @@ u32 GetColor(u16 x, u16 y)
return GetPixelColor(offset);
}

static u32 VerticalFilter(const std::array<u32, 3>& colors,
const std::array<u8, 7>& filterCoefficients)
{
u8 in_colors[3][4];
std::memcpy(&in_colors, colors.data(), sizeof(in_colors));

// Alpha channel is not used
u8 out_color[4];
out_color[ALP_C] = 0;

// All Coefficients should sum to 64, otherwise the total brightness will change, which many games
// do on purpose to implement a brightness filter across the whole copy.
for (int i = BLU_C; i <= RED_C; i++)
{
// TODO: implement support for multisampling.
// In non-multisampling mode:
// * Coefficients 2, 3 and 4 sample from the current pixel.
// * Coefficients 0 and 1 sample from the pixel above this one
// * Coefficients 5 and 6 sample from the pixel below this one
int sum =
in_colors[0][i] * (filterCoefficients[0] + filterCoefficients[1]) +
in_colors[1][i] * (filterCoefficients[2] + filterCoefficients[3] + filterCoefficients[4]) +
in_colors[2][i] * (filterCoefficients[5] + filterCoefficients[6]);

// TODO: this clamping behavior appears to be correct, but isn't confirmed on hardware.
out_color[i] = std::min(255, sum >> 6); // clamp larger values to 255
}

u32 out_color32;
std::memcpy(&out_color32, out_color, sizeof(out_color32));
return out_color32;
}

static u32 GammaCorrection(u32 color, const float gamma_rcp)
{
u8 in_colors[4];
std::memcpy(&in_colors, &color, sizeof(in_colors));

u8 out_color[4];
for (int i = BLU_C; i <= RED_C; i++)
{
out_color[i] = static_cast<u8>(
MathUtil::Clamp(std::pow(in_colors[i] / 255.0f, gamma_rcp) * 255.0f, 0.0f, 255.0f));
}

u32 out_color32;
std::memcpy(&out_color32, out_color, sizeof(out_color32));
return out_color32;
}

// For internal used only, return a non-normalized value, which saves work later.
yuv444 GetColorYUV(u16 x, u16 y)
static yuv444 ConvertColorToYUV(u32 color)
{
const u32 color = GetColor(x, y);
const u8 red = static_cast<u8>(color >> 24);
const u8 green = static_cast<u8>(color >> 16);
const u8 blue = static_cast<u8>(color >> 8);
@@ -497,16 +546,21 @@ u8* GetPixelPointer(u16 x, u16 y, bool depth)
return &efb[GetColorOffset(x, y)];
}

void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale)
void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale,
float gamma)
{
if (!xfb_in_ram)
{
WARN_LOG(VIDEO, "Tried to copy to invalid XFB address");
return;
}

int left = source_rect.left;
int right = source_rect.right;
const int left = source_rect.left;
const int right = source_rect.right;
const bool clamp_top = bpmem.triggerEFBCopy.clamp_top;
const bool clamp_bottom = bpmem.triggerEFBCopy.clamp_bottom;
const float gamma_rcp = 1.0f / gamma;
const auto filter_coefficients = bpmem.copyfilter.GetCoefficients();

// this assumes copies will always start on an even (YU) pixel and the
// copy always has an even width, which might not be true.
@@ -523,13 +577,30 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec
source.resize(EFB_WIDTH * EFB_HEIGHT);
yuv422_packed* src_ptr = &source[0];

for (float y = source_rect.top; y < source_rect.bottom; y++)
for (int y = source_rect.top; y < source_rect.bottom; y++)
{
// Get a scanline of YUV pixels in 4:4:4 format
// Clamping behavior
// NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
// which returns random garbage from the empty bus (confirmed by hardware tests).
//
// In our implementation, the garbage just so happens to be the top or bottom row.
// Statistically, that could happen.
u16 y_prev = static_cast<u16>(std::max(clamp_top ? source_rect.top : 0, y - 1));
u16 y_next = static_cast<u16>(std::min(clamp_bottom ? source_rect.bottom : EFB_HEIGHT, y + 1));

// Get a scanline of YUV pixels in 4:4:4 format
for (int i = 1, x = left; x < right; i++, x++)
{
scanline[i] = GetColorYUV(x, y);
// Get RGB colors
std::array<u32, 3> colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}};

// Vertical Filter (Multisampling resolve, deflicker, brightness)
u32 filtered = VerticalFilter(colors, filter_coefficients);

// Gamma correction happens here.
filtered = GammaCorrection(filtered, gamma_rcp);

scanline[i] = ConvertColorToYUV(filtered);
}

// Flipper clamps the border colors
@@ -549,7 +620,7 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec
src_ptr[x + 1].Y = scanline[i + 1].Y + 16;
// V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 * V[i+1]
src_ptr[x + 1].UV =
128 + ((scanline[i].V + (scanline[i + 1].V << 1) + scanline[i + 2].V) >> 2);
128 + ((scanline[i - 1].V + (scanline[i].V << 1) + scanline[i + 1].V) >> 2);
}
src_ptr += memory_stride;
}
@@ -4,6 +4,8 @@

#pragma once

#include <array>

#include "Common/CommonTypes.h"
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/VideoCommon.h"
@@ -52,12 +54,12 @@ void SetColor(u16 x, u16 y, u8* color);
void SetDepth(u16 x, u16 y, u32 depth);

u32 GetColor(u16 x, u16 y);
yuv444 GetColorYUV(u16 x, u16 y);
u32 GetDepth(u16 x, u16 y);

u8* GetPixelPointer(u16 x, u16 y, bool depth);

void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale);
void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale,
float gamma);

extern u32 perf_values[PQ_NUM_MEMBERS];
inline void IncPerfCounterQuadCount(PerfQueryType type)
@@ -87,8 +87,7 @@ std::unique_ptr<AbstractPipeline> SWRenderer::CreatePipeline(const AbstractPipel
}

// Called on the GPU thread
void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks,
float Gamma)
void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks)
{
OSD::DoCallbacks(OSD::CallbackType::OnFrame);

@@ -34,7 +34,7 @@ class SWRenderer : public Renderer

TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;

void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;

void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z) override;
@@ -18,15 +18,18 @@ class TextureCache : public TextureCacheBase
}
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
{
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
src_rect, scale_by_half);
src_rect, scale_by_half, y_scale, gamma);
}

private:
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity) override
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
{
// TODO: If we ever want to "fake" vram textures, we would need to implement this
}
@@ -1469,11 +1469,12 @@ void EncodeEfbCopy(u8* dst, const EFBCopyParams& params, u32 native_width, u32 b
}

void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half)
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma)
{
if (params.copy_format == EFBCopyFormat::XFB)
{
EfbInterface::EncodeXFB(dst, native_width, src_rect, params.y_scale);
EfbInterface::EncodeXFB(dst, native_width, src_rect, y_scale, gamma);
}
else
{
@@ -5,12 +5,12 @@
#pragma once

#include "Common/CommonTypes.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoCommon.h"

struct EFBCopyParams;

namespace TextureEncoder
{
void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half);
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma);
}
@@ -697,8 +697,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
BindEFBToStateTracker();
}

void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks,
float Gamma)
void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks)
{
// Pending/batched EFB pokes should be included in the final image.
FramebufferManager::GetInstance()->FlushEFBPokes();
@@ -59,7 +59,7 @@ class Renderer : public ::Renderer
void BBoxWrite(int index, u16 value) override;
TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;

void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;

void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable,
u32 color, u32 z) override;
@@ -100,7 +100,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,

void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
FramebufferManager::GetInstance()->FlushEFBPokes();
@@ -131,9 +133,9 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_widt
src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);

m_texture_converter->EncodeTextureToMemory(src_texture->GetView(), dst, params, native_width,
bytes_per_row, num_blocks_y, memory_stride, src_rect,
scale_by_half);
m_texture_converter->EncodeTextureToMemory(
src_texture->GetView(), dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
src_rect, scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients);

// Transition back to original state
src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout);
@@ -209,7 +211,9 @@ void TextureCache::DeleteShaders()

void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity)
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)
{
VKTexture* texture = static_cast<VKTexture*>(entry->texture.get());

@@ -228,6 +232,26 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer();
StateTracker::GetInstance()->EndRenderPass();

// Fill uniform buffer.
struct PixelUniforms
{
float filter_coefficients[3];
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float pixel_height;
u32 padding;
};
PixelUniforms uniforms;
for (size_t i = 0; i < filter_coefficients.size(); i++)
uniforms.filter_coefficients[i] = filter_coefficients[i] / 64.0f;
uniforms.gamma_rcp = 1.0f / gamma;
uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
uniforms.pixel_height =
g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT;
uniforms.padding = 0;

// Transition EFB to shader resource before binding.
// An out-of-bounds source region is valid here, and fine for the draw (since it is converted
// to texture coordinates), but it's not valid to resolve an out-of-range rectangle.
@@ -274,6 +298,10 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
g_shader_cache->GetPassthroughVertexShader(),
g_shader_cache->GetPassthroughGeometryShader(), shader);

u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(PixelUniforms));
std::memcpy(ubo_ptr, &uniforms, sizeof(PixelUniforms));
draw.CommitPSUniforms(sizeof(PixelUniforms));

draw.SetPSSampler(0, src_texture->GetView(), src_sampler);

VkRect2D dest_region = {{0, 0}, {texture->GetConfig().width, texture->GetConfig().height}};
@@ -38,7 +38,8 @@ class TextureCache : public TextureCacheBase

void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override;
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override;

@@ -52,8 +53,9 @@ class TextureCache : public TextureCacheBase

private:
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, EFBCopyFormat dst_format,
bool is_intensity) override;
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

std::unique_ptr<StreamBuffer> m_texture_upload_buffer;

@@ -38,6 +38,11 @@ struct EFBEncodeParams
{
std::array<s32, 4> position_uniform;
float y_scale;
float gamma_rcp;
float clamp_top;
float clamp_bottom;
s32 filter_coefficients[3];
u32 padding;
};
}
TextureConverter::TextureConverter()
@@ -201,10 +206,11 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
draw.EndRenderPass();
}

void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr,
const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half)
void TextureConverter::EncodeTextureToMemory(
VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
{
VkShaderModule shader = GetEncodingShader(params);
if (shader == VK_NULL_HANDLE)
@@ -236,14 +242,21 @@ void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_p
encoder_params.position_uniform[1] = src_rect.top;
encoder_params.position_uniform[2] = static_cast<s32>(native_width);
encoder_params.position_uniform[3] = scale_by_half ? 2 : 1;
encoder_params.y_scale = params.y_scale;
draw.SetPushConstants(&encoder_params, sizeof(encoder_params));
encoder_params.y_scale = y_scale;
encoder_params.gamma_rcp = 1.0f / gamma;
encoder_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
encoder_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 0.0f;
for (size_t i = 0; i < filter_coefficients.size(); i++)
encoder_params.filter_coefficients[i] = filter_coefficients[i];
u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(EFBEncodeParams));
std::memcpy(ubo_ptr, &encoder_params, sizeof(EFBEncodeParams));
draw.CommitPSUniforms(sizeof(EFBEncodeParams));

// We also linear filtering for both box filtering and downsampling higher resolutions to 1x
// TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
// complex down filtering to average all pixels and produce the correct result.
bool linear_filter =
(scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f;
(scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f;
draw.SetPSSampler(0, src_texture,
linear_filter ? g_object_cache->GetLinearSampler() :
g_object_cache->GetPointSampler());
@@ -40,9 +40,12 @@ class TextureConverter

// Uses an encoding shader to copy src_texture to dest_ptr.
// NOTE: Executes the current command buffer.
void EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half);
void
EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);

bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format);
void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry,
@@ -4,6 +4,7 @@

#pragma once

#include <array>
#include <string>

#include "Common/BitField.h"
@@ -944,8 +945,8 @@ union UPE_Copy
{
u32 Hex;

BitField<0, 1, u32> clamp0; // if set clamp top
BitField<1, 1, u32> clamp1; // if set clamp bottom
BitField<0, 1, u32> clamp_top; // if set clamp top
BitField<1, 1, u32> clamp_bottom; // if set clamp bottom
BitField<2, 1, u32> yuv; // if set, color conversion from RGB to YUV
BitField<3, 4, u32> target_pixel_format; // realformat is (fmt/2)+((fmt&1)*8).... for some reason
// the msb is the lsb (pattern: cycling right shift)
@@ -967,6 +968,29 @@ union UPE_Copy
}
};

union CopyFilterCoefficients
{
using Values = std::array<u8, 7>;

u64 Hex;

BitField<0, 6, u64> w0;
BitField<6, 6, u64> w1;
BitField<12, 6, u64> w2;
BitField<18, 6, u64> w3;
BitField<32, 6, u64> w4;
BitField<38, 6, u64> w5;
BitField<44, 6, u64> w6;

Values GetCoefficients() const
{
return {
static_cast<u8>(w0), static_cast<u8>(w1), static_cast<u8>(w2), static_cast<u8>(w3),
static_cast<u8>(w4), static_cast<u8>(w5), static_cast<u8>(w6),
};
}
};

union BPU_PreloadTileInfo
{
u32 hex;
@@ -1036,29 +1060,29 @@ struct BPMemory
// 2 channel, 16 when dest is RGBA
// also, doubles whenever mipmap box filter option is set (excent on RGBA). Probably to do with
// number of bytes to look at when smoothing
u32 dispcopyyscale; // 4e
u32 clearcolorAR; // 4f
u32 clearcolorGB; // 50
u32 clearZValue; // 51
UPE_Copy triggerEFBCopy; // 52
u32 copyfilter[2]; // 53,54
u32 boundbox0; // 55
u32 boundbox1; // 56
u32 unknown7[2]; // 57,58
X10Y10 scissorOffset; // 59
u32 unknown8[6]; // 5a,5b,5c,5d, 5e,5f
BPS_TmemConfig tmem_config; // 60-66
u32 metric; // 67
FieldMode fieldmode; // 68
u32 unknown10[7]; // 69-6F
u32 unknown11[16]; // 70-7F
FourTexUnits tex[2]; // 80-bf
TevStageCombiner combiners[16]; // 0xC0-0xDF
TevReg tevregs[4]; // 0xE0
FogRangeParams fogRange; // 0xE8
FogParams fog; // 0xEE,0xEF,0xF0,0xF1,0xF2
AlphaTest alpha_test; // 0xF3
ZTex1 ztex1; // 0xf4,0xf5
u32 dispcopyyscale; // 4e
u32 clearcolorAR; // 4f
u32 clearcolorGB; // 50
u32 clearZValue; // 51
UPE_Copy triggerEFBCopy; // 52
CopyFilterCoefficients copyfilter; // 53,54
u32 boundbox0; // 55
u32 boundbox1; // 56
u32 unknown7[2]; // 57,58
X10Y10 scissorOffset; // 59
u32 unknown8[6]; // 5a,5b,5c,5d, 5e,5f
BPS_TmemConfig tmem_config; // 60-66
u32 metric; // 67
FieldMode fieldmode; // 68
u32 unknown10[7]; // 69-6F
u32 unknown11[16]; // 70-7F
FourTexUnits tex[2]; // 80-bf
TevStageCombiner combiners[16]; // 0xC0-0xDF
TevReg tevregs[4]; // 0xE0
FogRangeParams fogRange; // 0xE8
FogParams fog; // 0xEE,0xEF,0xF0,0xF1,0xF2
AlphaTest alpha_test; // 0xF3
ZTex1 ztex1; // 0xf4,0xf5
ZTex2 ztex2;
TevKSel tevksel[8]; // 0xf6,0xf7,f8,f9,fa,fb,fc,fd
u32 bpMask; // 0xFE
@@ -229,10 +229,13 @@ static void BPWritten(const BPCmd& bp)
{
// bpmem.zcontrol.pixel_format to PEControl::Z24 is when the game wants to copy from ZBuffer
// (Zbuffer uses 24-bit Format)
static constexpr CopyFilterCoefficients::Values filter_coefficients = {
{0, 0, 21, 22, 21, 0, 0}};
bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
g_texture_cache->CopyRenderTargetToTexture(
destAddr, PE_copy.tp_realFormat(), srcRect.GetWidth(), srcRect.GetHeight(), destStride,
is_depth_copy, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale, 1.0f, 1.0f);
is_depth_copy, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale, 1.0f, 1.0f,
bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom, filter_coefficients);
}
else
{
@@ -260,9 +263,10 @@ static void BPWritten(const BPCmd& bp)
bpmem.copyTexSrcWH.x + 1, destStride, height, yScale);

bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
g_texture_cache->CopyRenderTargetToTexture(destAddr, EFBCopyFormat::XFB, srcRect.GetWidth(),
height, destStride, is_depth_copy, srcRect, false,
false, yScale, s_gammaLUT[PE_copy.gamma]);
g_texture_cache->CopyRenderTargetToTexture(
destAddr, EFBCopyFormat::XFB, srcRect.GetWidth(), height, destStride, is_depth_copy,
srcRect, false, false, yScale, s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top,
bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients());

// This stays in to signal end of a "frame"
g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]);
@@ -1015,9 +1019,9 @@ void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
"Copy to XFB: %s\n"
"Intensity format: %s\n"
"Automatic color conversion: %s",
(copy.clamp0 && copy.clamp1) ?
(copy.clamp_top && copy.clamp_bottom) ?
"Top and Bottom" :
(copy.clamp0) ? "Top only" : (copy.clamp1) ? "Bottom only" : "None",
(copy.clamp_top) ? "Top only" : (copy.clamp_bottom) ? "Bottom only" : "None",
no_yes[copy.yuv], static_cast<int>(copy.tp_realFormat()),
(copy.gamma == 0) ?
"1.0" :
@@ -680,7 +680,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
// TODO: merge more generic parts into VideoCommon
{
std::lock_guard<std::mutex> guard(m_swap_mutex);
g_renderer->SwapImpl(xfb_entry->texture.get(), xfb_rect, ticks, xfb_entry->gamma);
g_renderer->SwapImpl(xfb_entry->texture.get(), xfb_rect, ticks);
}

// Update the window size based on the frame that was just rendered.
@@ -175,8 +175,7 @@ class Renderer
// Finish up the current frame, print some stats
void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,
u64 ticks);
virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks,
float Gamma = 1.0f) = 0;
virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) = 0;

PEControl::PixelFormat GetPrevPixelFormat() const { return m_prev_efb_format; }
void StorePixelFormat(PEControl::PixelFormat new_format) { m_prev_efb_format = new_format; }
@@ -1499,10 +1499,39 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
}
}

void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width,
u32 height, u32 dstStride, bool is_depth_copy,
const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf, float y_scale, float gamma)
TextureCacheBase::CopyFilterCoefficientArray
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return {static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4]),
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
}

TextureCacheBase::CopyFilterCoefficientArray
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
CopyFilterCoefficientArray res = GetRAMCopyFilterCoefficients(coefficients);
if (!g_ActiveConfig.bDisableCopyFilter)
return res;

// Disabling the copy filter in options should not ignore the values the game sets completely,
// as some games use the filter coefficients to control the brightness of the screen. Instead,
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
res[1] += res[0] + res[2];
res[0] = 0;
res[2] = 0;
return res;
}

void TextureCacheBase::CopyRenderTargetToTexture(
u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom, const CopyFilterCoefficients::Values& filter_coefficients)
{
// Emulation methods:
//
@@ -1622,8 +1651,10 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
if (copy_to_ram)
{
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, y_scale);
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf);
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
y_scale, gamma, clamp_top, clamp_bottom,
GetRAMCopyFilterCoefficients(filter_coefficients));
}
else
{
@@ -1742,8 +1773,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
{
entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy);
entry->SetDimensions(tex_w, tex_h, 1);
entry->gamma = gamma;

entry->frameCount = FRAMECOUNT_INVALID;
if (is_xfb_copy)
{
@@ -1757,7 +1786,9 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
entry->may_have_overlapping_textures = false;
entry->is_custom_tex = false;

CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity);
CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity, gamma,
clamp_top, clamp_bottom,
GetVRAMCopyFilterCoefficients(filter_coefficients));

u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);
@@ -47,23 +47,21 @@ struct TextureAndTLUTFormat
struct EFBCopyParams
{
EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
bool yuv_, float y_scale_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
y_scale(y_scale_)
bool yuv_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_)
{
}

bool operator<(const EFBCopyParams& rhs) const
{
return std::tie(efb_format, copy_format, depth, yuv, y_scale) <
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.y_scale);
return std::tie(efb_format, copy_format, depth, yuv) <
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv);
}

PEControl::PixelFormat efb_format;
EFBCopyFormat copy_format;
bool depth;
bool yuv;
float y_scale;
};

struct TextureLookupInformation
@@ -108,6 +106,8 @@ class TextureCacheBase
static const int FRAMECOUNT_INVALID = 0;

public:
using CopyFilterCoefficientArray = std::array<u32, 3>;

struct TCacheEntry
{
// common members
@@ -126,7 +126,6 @@ class TextureCacheBase
// content, aren't just downscaled
bool should_force_safe_hashing = false; // for XFB
bool is_xfb_copy = false;
float gamma = 1.0f;
u64 id;

bool reference_changed = false; // used by xfb to determine when a reference xfb changed
@@ -216,7 +215,9 @@ class TextureCacheBase

virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) = 0;
bool scale_by_half, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) = 0;

virtual bool CompileShaders() = 0;
virtual void DeleteShaders() = 0;
@@ -248,7 +249,9 @@ class TextureCacheBase
virtual void BindTextures();
void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height,
u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, float y_scale, float gamma);
bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficients::Values& filter_coefficients);

virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TLUTFormat format) = 0;
@@ -315,13 +318,21 @@ class TextureCacheBase

virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
EFBCopyFormat dst_format, bool is_intensity) = 0;
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) = 0;

// Removes and unlinks texture from texture cache and returns it to the pool
TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter);

void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);

// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
CopyFilterCoefficientArray
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
CopyFilterCoefficientArray
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);

TexAddrCache textures_by_address;
TexHashCache textures_by_hash;
TexPool texture_pool;

Large diffs are not rendered by default.

@@ -38,34 +38,66 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
if (api_type == APIType::OpenGL)
{
out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"#define samp0 samp9\n"
"#define uv0 f_uv0\n"
"uniform float3 filter_coefficients;\n"
"uniform float gamma_rcp;\n"
"uniform float2 clamp_tb;\n"
"uniform float pixel_height;\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
" return texture(samp9, float3(uv.x, clamp(uv.y - (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), %s));\n"
"}\n",
mono_depth ? "0.0" : "uv.z");
out.Write("#define uv0 f_uv0\n"
"in vec3 uv0;\n"
"out vec4 ocol0;\n"
"void main(){\n"
" vec4 texcol = texture(samp0, %s);\n",
mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0");
"void main(){\n");
}
else if (api_type == APIType::Vulkan)
{
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
"layout(location = 0) in vec3 uv0;\n"
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"};\n");
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
" return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), %s));\n"
"}\n",
mono_depth ? "0.0" : "uv.z");
out.Write("layout(location = 0) in vec3 uv0;\n"
"layout(location = 1) in vec4 col0;\n"
"layout(location = 0) out vec4 ocol0;"
"void main(){\n"
" vec4 texcol = texture(samp0, %s);\n",
mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0");
"void main(){\n");
}
else if (api_type == APIType::D3D)
{
out.Write("Texture2DArray tex0 : register(t0);\n"
"SamplerState samp0 : register(s0);\n"
"void main(out float4 ocol0 : SV_Target,\n"
"uniform float3 filter_coefficients;\n"
"uniform float gamma_rcp;\n"
"uniform float2 clamp_tb;\n"
"uniform float pixel_height;\n\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
" return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), %s));\n"
"}\n",
mono_depth ? "0.0" : "uv.z");
out.Write("void main(out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0) {\n"
" float4 texcol = tex0.Sample(samp0, uv0);\n");
" in float3 uv0 : TEXCOORD0) {\n");
}

// The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact.
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], current_row.a);\n");

if (uid_data->is_depth_copy)
{
if (api_type == APIType::D3D || api_type == APIType::Vulkan)
@@ -223,8 +255,8 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
out.Write(" ocol0 = texcol;\n");
break;

case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy
out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n");
case EFBCopyFormat::XFB:
out.Write(" ocol0 = float4(pow(texcol.rgb, gamma_rcp.xxx), texcol.a);\n");
break;

default:
@@ -120,6 +120,7 @@ void VideoConfig::Refresh()
iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
sPostProcessingShader = Config::Get(Config::GFX_ENHANCE_POST_SHADER);
bForceTrueColor = Config::Get(Config::GFX_ENHANCE_FORCE_TRUE_COLOR);
bDisableCopyFilter = Config::Get(Config::GFX_ENHANCE_DISABLE_COPY_FILTER);

stereo_mode = static_cast<StereoMode>(Config::Get(Config::GFX_STEREO_MODE));
iStereoDepth = Config::Get(Config::GFX_STEREO_DEPTH);
@@ -73,6 +73,7 @@ struct VideoConfig final
int iMaxAnisotropy;
std::string sPostProcessingShader;
bool bForceTrueColor;
bool bDisableCopyFilter;

// Information
bool bShowFPS;