@@ -57,6 +57,8 @@ class SWStagingTexture final : public AbstractStagingTexture
void Unmap() override;
void Flush() override;

void SetMapStride(size_t stride) { m_map_stride = stride; }

private:
std::vector<u8> m_data;
};
@@ -16,8 +16,8 @@ class TextureCache : public TextureCacheBase
TLUTFormat format) override
{
}
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
{
@@ -5,12 +5,14 @@
#include "VideoBackends/Software/TextureEncoder.h"

#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "Common/Swap.h"

#include "VideoBackends/Software/EfbInterface.h"
#include "VideoBackends/Software/SWTexture.h"

#include "VideoCommon/BPMemory.h"
#include "VideoCommon/LookUpTables.h"
@@ -1468,18 +1470,26 @@ void EncodeEfbCopy(u8* dst, const EFBCopyParams& params, u32 native_width, u32 b
}
}

void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma)
void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma)
{
// HACK: Override the memory stride for this staging texture with new copy stride.
// This is required because the texture encoder assumes that we're writing directly to memory,
// and each row is tightly packed with no padding, whereas our encoding abstract texture has
// a width of 2560. When we copy the texture back later on, it'll use the tightly packed stride.
ASSERT(memory_stride <= (dst->GetConfig().width * dst->GetTexelSize()));
static_cast<SW::SWStagingTexture*>(dst)->SetMapStride(memory_stride);

if (params.copy_format == EFBCopyFormat::XFB)
{
EfbInterface::EncodeXFB(dst, native_width, src_rect, y_scale, gamma);
EfbInterface::EncodeXFB(reinterpret_cast<u8*>(dst->GetMappedPointer()), native_width, src_rect,
y_scale, gamma);
}
else
{
EncodeEfbCopy(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect,
scale_by_half);
EncodeEfbCopy(reinterpret_cast<u8*>(dst->GetMappedPointer()), params, native_width,
bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half);
}
}
}
@@ -10,7 +10,7 @@

namespace TextureEncoder
{
void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
float y_scale, float gamma);
void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma);
}
@@ -611,7 +611,7 @@ bool FramebufferManager::PopulateColorReadbackTexture()
{
// Can't be in our normal render pass.
StateTracker::GetInstance()->EndRenderPass();
StateTracker::GetInstance()->OnReadback();
StateTracker::GetInstance()->OnCPUEFBAccess();

// Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on.
VkRect2D src_region = {{0, 0}, {GetEFBWidth(), GetEFBHeight()}};
@@ -684,7 +684,7 @@ bool FramebufferManager::PopulateDepthReadbackTexture()
{
// Can't be in our normal render pass.
StateTracker::GetInstance()->EndRenderPass();
StateTracker::GetInstance()->OnReadback();
StateTracker::GetInstance()->OnCPUEFBAccess();

// Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on.
VkRect2D src_region = {{0, 0}, {GetEFBWidth(), GetEFBHeight()}};
@@ -513,7 +513,7 @@ void StateTracker::OnDraw()
}
}

void StateTracker::OnReadback()
void StateTracker::OnCPUEFBAccess()
{
// Check this isn't another access without any draws inbetween.
if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter)
@@ -523,9 +523,28 @@ void StateTracker::OnReadback()
m_cpu_accesses_this_frame.emplace_back(m_draw_counter);
}

void StateTracker::OnEFBCopyToRAM()
{
// If we're not deferring, try to preempt it next frame.
if (!g_ActiveConfig.bDeferEFBCopies)
{
OnCPUEFBAccess();
return;
}

// Otherwise, only execute if we have at least 10 objects between us and the last copy.
const u32 diff = m_draw_counter - m_last_efb_copy_draw_counter;
m_last_efb_copy_draw_counter = m_draw_counter;
if (diff < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK)
return;

Util::ExecuteCurrentCommandsAndRestoreState(true);
}

void StateTracker::OnEndFrame()
{
m_draw_counter = 0;
m_last_efb_copy_draw_counter = 0;
m_scheduled_command_buffer_kicks.clear();

// If we have no CPU access at all, leave everything in the one command buffer for maximum
@@ -86,8 +86,10 @@ class StateTracker
void OnDraw();

// Call after CPU access is requested.
// This can be via EFBCache or EFB2RAM.
void OnReadback();
void OnCPUEFBAccess();

// Call after an EFB copy to RAM. If true, the current command buffer should be executed.
void OnEFBCopyToRAM();

// Call at the end of a frame.
void OnEndFrame();
@@ -182,6 +184,7 @@ class StateTracker

// CPU access tracking
u32 m_draw_counter = 0;
u32 m_last_efb_copy_draw_counter = 0;
std::vector<u32> m_cpu_accesses_this_frame;
std::vector<u32> m_scheduled_command_buffer_kicks;
bool m_allow_background_execution = true;
@@ -98,8 +98,8 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}

void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients)
@@ -126,7 +126,6 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_widt
// The barrier has to happen after the render pass, not inside it, as we are going to be
// reading from the texture immediately afterwards.
StateTracker::GetInstance()->EndRenderPass();
StateTracker::GetInstance()->OnReadback();

// Transition to shader resource before reading.
VkImageLayout original_layout = src_texture->GetLayout();
@@ -139,6 +138,8 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_widt

// Transition back to original state
src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout);

StateTracker::GetInstance()->OnEFBCopyToRAM();
}

bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format)
@@ -36,8 +36,8 @@ class TextureCache : public TextureCacheBase
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TLUTFormat format) override;

void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override;

@@ -207,10 +207,10 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
}

void TextureConverter::EncodeTextureToMemory(
VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
{
VkShaderModule shader = GetEncodingShader(params);
if (shader == VK_NULL_HANDLE)
@@ -273,9 +273,7 @@ void TextureConverter::EncodeTextureToMemory(
draw.EndRenderPass();

MathUtil::Rectangle<int> copy_rect(0, 0, render_width, render_height);
m_encoding_readback_texture->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0,
copy_rect);
m_encoding_readback_texture->ReadTexels(copy_rect, dest_ptr, memory_stride);
dest->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect);
}

bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format)
@@ -610,14 +608,8 @@ VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyParams& params)

bool TextureConverter::CreateEncodingTexture()
{
TextureConfig config(ENCODING_TEXTURE_WIDTH, ENCODING_TEXTURE_HEIGHT, 1, 1, 1,
ENCODING_TEXTURE_FORMAT, true);

m_encoding_render_texture = g_renderer->CreateTexture(config);
m_encoding_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, config);

return m_encoding_render_texture && m_encoding_readback_texture;
m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig());
return m_encoding_render_texture != nullptr;
}

bool TextureConverter::CreateDecodingTexture()
@@ -21,7 +21,6 @@ class AbstractStagingTexture;

namespace Vulkan
{
class StagingTexture2D;
class Texture2D;
class VKTexture;

@@ -38,14 +37,12 @@ class TextureConverter
TextureCache::TCacheEntry* src_entry, const void* palette,
TLUTFormat palette_format);

// Uses an encoding shader to copy src_texture to dest_ptr.
// NOTE: Executes the current command buffer.
void
EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
float gamma, bool clamp_top, bool clamp_bottom,
const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);
// Uses an encoding shader to copy src_texture to dest.
void EncodeTextureToMemory(
VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);

bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format);
void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry,
@@ -54,9 +51,6 @@ class TextureConverter
const u8* palette, TLUTFormat palette_format);

private:
static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4;
static const u32 ENCODING_TEXTURE_HEIGHT = 1024;
static const AbstractTextureFormat ENCODING_TEXTURE_FORMAT = AbstractTextureFormat::BGRA8;
static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3;

// Maximum size of a texture based on BP registers.
@@ -100,7 +94,6 @@ class TextureConverter
// Texture encoding - RGBA8->GX format in memory
std::map<EFBCopyParams, VkShaderModule> m_encoding_shaders;
std::unique_ptr<AbstractTexture> m_encoding_render_texture;
std::unique_ptr<AbstractStagingTexture> m_encoding_readback_texture;

// Texture decoding - GX format in memory->RGBA8
struct TextureDecodingPipeline
@@ -466,6 +466,7 @@ void VKStagingTexture::CopyFromTexture(Texture2D* src, const MathUtil::Rectangle
m_needs_flush = false;
g_command_buffer_mgr->RemoveFencePointCallback(
this);
m_staging_buffer->InvalidateCPUCache();
});
}

@@ -177,6 +177,7 @@ static void BPWritten(const BPCmd& bp)
switch (bp.newvalue & 0xFF)
{
case 0x02:
g_texture_cache->FlushEFBCopies();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetFinish(); // may generate interrupt
DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
@@ -188,11 +189,13 @@ static void BPWritten(const BPCmd& bp)
}
return;
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
g_texture_cache->FlushEFBCopies();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
return;
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
g_texture_cache->FlushEFBCopies();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));
@@ -724,6 +724,10 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
// state changes the specialized shader will not take over.
g_vertex_manager->InvalidatePipelineObject();

// Flush any outstanding EFB copies to RAM, in case the game is running at an uncapped frame
// rate and not waiting for vblank. Otherwise, we'd end up with a huge list of pending copies.
g_texture_cache->FlushEFBCopies();

Core::Callback_VideoCopiedToXFB(true);
}

@@ -28,6 +28,7 @@
#include "Core/FifoPlayer/FifoRecorder.h"
#include "Core/HW/Memmap.h"

#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/FramebufferManagerBase.h"
@@ -89,6 +90,7 @@ TextureCacheBase::TextureCacheBase()

void TextureCacheBase::Invalidate()
{
FlushEFBCopies();
InvalidateAllBindPoints();
for (size_t i = 0; i < bound_textures.size(); ++i)
{
@@ -1693,35 +1695,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(
const u32 bytes_per_row = num_blocks_x * bytes_per_block;
const u32 covered_range = num_blocks_y * dstStride;

if (copy_to_ram)
{
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
y_scale, gamma, clamp_top, clamp_bottom, coefficients);
}
else
{
if (is_xfb_copy)
{
UninitializeXFBMemory(dst, dstStride, bytes_per_row, num_blocks_y);
}
else
{
// Hack: Most games don't actually need the correct texture data in RAM
// and we can just keep a copy in VRAM. We zero the memory so we
// can check it hasn't changed before using our copy in VRAM.
u8* ptr = dst;
for (u32 i = 0; i < num_blocks_y; i++)
{
memset(ptr, 0, bytes_per_row);
ptr += dstStride;
}
}
}

if (g_bRecordFifoData)
{
// Mark the memory behind this efb copy as dynamicly generated for the Fifo log
@@ -1775,7 +1748,9 @@ void TextureCacheBase::CopyRenderTargetToTexture(
(!strided_efb_copy && entry->size_in_bytes == overlap_range) ||
(strided_efb_copy && entry->size_in_bytes == overlap_range && entry->addr == dstAddr))
{
iter.first = InvalidateTexture(iter.first);
// Pending EFB copies which are completely covered by this new copy can simply be tossed,
// instead of having to flush them later on, since this copy will write over everything.
iter.first = InvalidateTexture(iter.first, true);
continue;
}
entry->may_have_overlapping_textures = true;
@@ -1804,6 +1779,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(
++iter.first;
}

TCacheEntry* entry = nullptr;
if (copy_to_vram)
{
// create the texture
@@ -1813,8 +1789,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(
config.height = scaled_tex_h;
config.layers = FramebufferManagerBase::GetEFBLayers();

TCacheEntry* entry = AllocateCacheEntry(config);

entry = AllocateCacheEntry(config);
if (entry)
{
entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy);
@@ -1836,9 +1811,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(
clamp_top, clamp_bottom,
GetVRAMCopyFilterCoefficients(filter_coefficients));

u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);

if (g_ActiveConfig.bDumpEFBTarget && !is_xfb_copy)
{
static int efb_count = 0;
@@ -1860,6 +1832,134 @@ void TextureCacheBase::CopyRenderTargetToTexture(
textures_by_address.emplace(dstAddr, entry);
}
}

if (copy_to_ram)
{
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));

std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
if (staging_texture)
{
CopyEFB(staging_texture.get(), format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect,
scaleByHalf, y_scale, gamma, clamp_top, clamp_bottom, coefficients);

// We can't defer if there is no VRAM copy (since we need to update the hash).
if (!copy_to_vram || !g_ActiveConfig.bDeferEFBCopies)
{
// Immediately flush it.
WriteEFBCopyToRAM(dst, bytes_per_row / sizeof(u32), num_blocks_y, dstStride,
std::move(staging_texture));
}
else
{
// Defer the flush until later.
entry->pending_efb_copy = std::move(staging_texture);
entry->pending_efb_copy_width = bytes_per_row / sizeof(u32);
entry->pending_efb_copy_height = num_blocks_y;
entry->pending_efb_copy_invalidated = false;
m_pending_efb_copies.push_back(entry);
}
}
}
else
{
if (is_xfb_copy)
{
UninitializeXFBMemory(dst, dstStride, bytes_per_row, num_blocks_y);
}
else
{
// Hack: Most games don't actually need the correct texture data in RAM
// and we can just keep a copy in VRAM. We zero the memory so we
// can check it hasn't changed before using our copy in VRAM.
u8* ptr = dst;
for (u32 i = 0; i < num_blocks_y; i++)
{
std::memset(ptr, 0, bytes_per_row);
ptr += dstStride;
}
}
}

// Even if the copy is deferred, still compute the hash. This way if the copy is used as a texture
// in a subsequent draw before it is flushed, it will have the same hash.
if (entry)
{
const u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);
}
}

void TextureCacheBase::FlushEFBCopies()
{
if (m_pending_efb_copies.empty())
return;

for (TCacheEntry* entry : m_pending_efb_copies)
FlushEFBCopy(entry);
m_pending_efb_copies.clear();
}

TextureConfig TextureCacheBase::GetEncodingTextureConfig()
{
return TextureConfig(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, true);
}

void TextureCacheBase::WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride,
std::unique_ptr<AbstractStagingTexture> staging_texture)
{
MathUtil::Rectangle<int> copy_rect(0, 0, static_cast<int>(width), static_cast<int>(height));
staging_texture->ReadTexels(copy_rect, dst_ptr, stride);
ReleaseEFBCopyStagingTexture(std::move(staging_texture));
}

void TextureCacheBase::FlushEFBCopy(TCacheEntry* entry)
{
// Copy from texture -> guest memory.
u8* const dst = Memory::GetPointer(entry->addr);
WriteEFBCopyToRAM(dst, entry->pending_efb_copy_width, entry->pending_efb_copy_height,
entry->memory_stride, std::move(entry->pending_efb_copy));

// If the EFB copy was invalidated (e.g. the bloom case mentioned in InvalidateTexture),
// now is the time to clean up the TCacheEntry. In which case, we don't need to compute
// the new hash of the RAM copy.
if (entry->pending_efb_copy_invalidated)
{
auto config = entry->texture->GetConfig();
texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture)));
return;
}

// Re-hash the texture now that the guest memory is populated.
// This should be safe because we'll catch any writes before the game can modify it.
const u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);
}

std::unique_ptr<AbstractStagingTexture> TextureCacheBase::GetEFBCopyStagingTexture()
{
// Pull off the back first to re-use the most frequently used textures.
if (!m_efb_copy_staging_texture_pool.empty())
{
auto ptr = std::move(m_efb_copy_staging_texture_pool.back());
m_efb_copy_staging_texture_pool.pop_back();
return ptr;
}

std::unique_ptr<AbstractStagingTexture> tex =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, GetEncodingTextureConfig());
if (!tex)
WARN_LOG(VIDEO, "Failed to create EFB copy staging texture");

return tex;
}

void TextureCacheBase::ReleaseEFBCopyStagingTexture(std::unique_ptr<AbstractStagingTexture> tex)
{
m_efb_copy_staging_texture_pool.push_back(std::move(tex));
}

void TextureCacheBase::UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row,
@@ -1989,7 +2089,7 @@ TextureCacheBase::FindOverlappingTextures(u32 addr, u32 size_in_bytes)
}

TextureCacheBase::TexAddrCache::iterator
TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter)
TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pending_efb_copy)
{
if (iter == textures_by_address.end())
return textures_by_address.end();
@@ -2014,6 +2114,33 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter)
}
}

// If this is a pending EFB copy, we don't want to flush it here.
// Why? Because let's say a game is rendering a bloom-type effect, using EFB copies to essentially
// downscale the framebuffer. Copy from EFB->Texture, draw texture to EFB, copy EFB->Texture,
// draw, repeat. The second copy will invalidate the first, forcing a flush. Which means we lose
// any benefit of EFB copy batching. So instead, let's just leave the EFB copy pending, but remove
// it from the texture cache. This way we don't use the old VRAM copy. When the EFB copies are
// eventually flushed, they will overwrite each other, and the end result should be the same.
if (entry->pending_efb_copy)
{
if (discard_pending_efb_copy)
{
// If the RAM copy is being completely overwritten by a new EFB copy, we can discard the
// existing pending copy, and not bother waiting for it in the future. This happens in
// Xenoblade's sunset scene, where 35 copies are done per frame, and 25 of them are
// copied to the same address, and can be skipped.
ReleaseEFBCopyStagingTexture(std::move(entry->pending_efb_copy));
auto pending_it = std::find(m_pending_efb_copies.begin(), m_pending_efb_copies.end(), entry);
if (pending_it != m_pending_efb_copies.end())
m_pending_efb_copies.erase(pending_it);
}
else
{
entry->pending_efb_copy_invalidated = true;
return textures_by_address.erase(iter);
}
}

auto config = entry->texture->GetConfig();
texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture)));

@@ -13,6 +13,7 @@
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "Common/CommonTypes.h"
#include "VideoCommon/AbstractTexture.h"
@@ -22,6 +23,7 @@
#include "VideoCommon/VideoCommon.h"

struct VideoConfig;
class AbstractStagingTexture;

struct TextureAndTLUTFormat
{
@@ -149,6 +151,12 @@ class TextureCacheBase
// * partially updated textures which refer to this efb copy
std::unordered_set<TCacheEntry*> references;

// Pending EFB copy
std::unique_ptr<AbstractStagingTexture> pending_efb_copy;
u32 pending_efb_copy_width = 0;
u32 pending_efb_copy_height = 0;
bool pending_efb_copy_invalidated = false;

explicit TCacheEntry(std::unique_ptr<AbstractTexture> tex);

~TCacheEntry();
@@ -216,10 +224,10 @@ class TextureCacheBase

void Invalidate();

virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom,
virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) = 0;

virtual bool CompileShaders() = 0;
@@ -278,6 +286,12 @@ class TextureCacheBase

void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height);

// Flushes all pending EFB copies to emulated RAM.
void FlushEFBCopies();

// Returns a texture config suitable for drawing a RAM EFB copy into.
static TextureConfig GetEncodingTextureConfig();

protected:
TextureCacheBase();

@@ -329,7 +343,8 @@ class TextureCacheBase
const CopyFilterCoefficientArray& filter_coefficients) = 0;

// Removes and unlinks texture from texture cache and returns it to the pool
TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter);
TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter,
bool discard_pending_efb_copy = false);

void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);

@@ -339,6 +354,17 @@ class TextureCacheBase
CopyFilterCoefficientArray
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;

// Flushes a pending EFB copy to RAM from the host to the guest RAM.
void WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride,
std::unique_ptr<AbstractStagingTexture> staging_texture);
void FlushEFBCopy(TCacheEntry* entry);

// Returns a staging texture of the maximum EFB copy size.
std::unique_ptr<AbstractStagingTexture> GetEFBCopyStagingTexture();

// Returns an EFB copy staging texture to the pool, so it can be re-used.
void ReleaseEFBCopyStagingTexture(std::unique_ptr<AbstractStagingTexture> tex);

TexAddrCache textures_by_address;
TexHashCache textures_by_hash;
TexPool texture_pool;
@@ -360,6 +386,13 @@ class TextureCacheBase
bool arbitrary_mipmap_detection;
};
BackupConfig backup_config = {};

// Pool of readback textures used for deferred EFB copies.
std::vector<std::unique_ptr<AbstractStagingTexture>> m_efb_copy_staging_texture_pool;

// List of pending EFB copies. It is important that the order is preserved for these,
// so that overlapping textures are written to guest RAM in the order they are issued.
std::vector<TCacheEntry*> m_pending_efb_copies;
};

extern std::unique_ptr<TextureCacheBase> g_texture_cache;
@@ -141,6 +141,7 @@ void VideoConfig::Refresh()
bSkipEFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM);
bSkipXFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM);
bDisableCopyToVRAM = Config::Get(Config::GFX_HACK_DISABLE_COPY_TO_VRAM);
bDeferEFBCopies = Config::Get(Config::GFX_HACK_DEFER_EFB_COPIES);
bImmediateXFB = Config::Get(Config::GFX_HACK_IMMEDIATE_XFB);
bCopyEFBScaled = Config::Get(Config::GFX_HACK_COPY_EFB_SCALED);
bEFBEmulateFormatChanges = Config::Get(Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES);
@@ -120,6 +120,7 @@ struct VideoConfig final
bool bSkipEFBCopyToRam;
bool bSkipXFBCopyToRam;
bool bDisableCopyToVRAM;
bool bDeferEFBCopies;
bool bImmediateXFB;
bool bCopyEFBScaled;
int iSafeTextureCache_ColorSamples;