Skip to content

Commit

Permalink
VideoCommon: Update EFB peek cache on draw done and tokens
Browse files Browse the repository at this point in the history
Massively improves performance in Mario Galaxy on Android.
  • Loading branch information
K0bin committed Sep 29, 2022
1 parent dafe2c7 commit 147b334
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 11 deletions.
3 changes: 3 additions & 0 deletions Source/Core/VideoCommon/BPStructs.cpp
Expand Up @@ -181,6 +181,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_draw_done);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetFinish(cycles_into_future); // may generate interrupt
DEBUG_LOG_FMT(VIDEO, "GXSetDrawDone SetPEFinish (value: {:#04X})", bp.newvalue & 0xFFFF);
Expand All @@ -195,6 +196,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_token);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false, cycles_into_future);
DEBUG_LOG_FMT(VIDEO, "SetPEToken {:#06X}", bp.newvalue & 0xFFFF);
Expand All @@ -203,6 +205,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_token_int);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true, cycles_into_future);
DEBUG_LOG_FMT(VIDEO, "SetPEToken + INT {:#06X}", bp.newvalue & 0xFFFF);
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoCommon/Fifo.cpp
Expand Up @@ -26,6 +26,7 @@
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
Expand Down Expand Up @@ -415,6 +416,7 @@ void RunGpuLoop()
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
g_vertex_manager->Flush();
g_framebuffer_manager->RefreshPeekCache();
}
},
100);
Expand Down
93 changes: 84 additions & 9 deletions Source/Core/VideoCommon/FramebufferManager.cpp
Expand Up @@ -389,7 +389,7 @@ bool FramebufferManager::IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* ti
{
*tile_index =
((y / m_efb_cache_tile_size) * m_efb_cache_tiles_wide) + (x / m_efb_cache_tile_size);
return data.valid && data.tiles[*tile_index];
return data.valid && data.tiles[*tile_index].present;
}
}

Expand Down Expand Up @@ -417,6 +417,14 @@ u32 FramebufferManager::PeekEFBColor(u32 x, u32 y)
if (!IsEFBCacheTilePresent(false, x, y, &tile_index))
PopulateEFBCache(false, tile_index);

m_efb_color_cache.tiles[tile_index].frame_access_mask |= 1;

if (m_efb_color_cache.needs_flush)
{
m_efb_color_cache.readback_texture->Flush();
m_efb_color_cache.needs_flush = false;
}

u32 value;
m_efb_color_cache.readback_texture->ReadTexel(x, y, &value);
return value;
Expand All @@ -432,6 +440,14 @@ float FramebufferManager::PeekEFBDepth(u32 x, u32 y)
if (!IsEFBCacheTilePresent(true, x, y, &tile_index))
PopulateEFBCache(true, tile_index);

m_efb_depth_cache.tiles[tile_index].frame_access_mask |= 1;

if (m_efb_depth_cache.needs_flush)
{
m_efb_depth_cache.readback_texture->Flush();
m_efb_depth_cache.needs_flush = false;
}

float value;
m_efb_depth_cache.readback_texture->ReadTexel(x, y, &value);
return value;
Expand All @@ -449,23 +465,64 @@ void FramebufferManager::SetEFBCacheTileSize(u32 size)
PanicAlertFmt("Failed to create EFB readback framebuffers");
}

void FramebufferManager::RefreshPeekCache()
{
if (m_efb_color_cache.valid && m_efb_depth_cache.valid)
{
return;
}

bool flush_command_buffer = false;
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
if (m_efb_color_cache.tiles[i].frame_access_mask != 0 &&
(!m_efb_color_cache.valid || !m_efb_color_cache.tiles[i].present))
{
PopulateEFBCache(false, i, true);
flush_command_buffer = true;
}
if (m_efb_depth_cache.tiles[i].frame_access_mask != 0 &&
(!m_efb_depth_cache.valid || !m_efb_depth_cache.tiles[i].present))
{
PopulateEFBCache(true, i, true);
flush_command_buffer = true;
}
}
if (flush_command_buffer)
{
g_renderer->Flush();
}
}

void FramebufferManager::InvalidatePeekCache(bool forced)
{
if (forced || m_efb_color_cache.out_of_date)
{
if (m_efb_color_cache.valid)
std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false);
{
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
m_efb_color_cache.tiles[i].present = false;
}
}

m_efb_color_cache.valid = false;
m_efb_color_cache.out_of_date = false;
m_efb_color_cache.needs_flush = true;
}
if (forced || m_efb_depth_cache.out_of_date)
{
if (m_efb_depth_cache.valid)
std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false);
{
for (u32 i = 0; i < m_efb_depth_cache.tiles.size(); i++)
{
m_efb_depth_cache.tiles[i].present = false;
}
}

m_efb_depth_cache.valid = false;
m_efb_depth_cache.out_of_date = false;
m_efb_depth_cache.needs_flush = true;
}
}

Expand All @@ -480,6 +537,15 @@ void FramebufferManager::FlagPeekCacheAsOutOfDate()
InvalidatePeekCache();
}

void FramebufferManager::EndOfFrame()
{
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
m_efb_color_cache.tiles[i].frame_access_mask <<= 1;
m_efb_depth_cache.tiles[i].frame_access_mask <<= 1;
}
}

bool FramebufferManager::CompileReadbackPipelines()
{
AbstractPipelineConfig config = {};
Expand Down Expand Up @@ -612,9 +678,11 @@ bool FramebufferManager::CreateReadbackFramebuffer()
const u32 tiles_high = ((EFB_HEIGHT + (m_efb_cache_tile_size - 1)) / m_efb_cache_tile_size);
const u32 total_tiles = tiles_wide * tiles_high;
m_efb_color_cache.tiles.resize(total_tiles);
std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false);
std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(),
EFBCacheTile{false, 0});
m_efb_depth_cache.tiles.resize(total_tiles);
std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false);
std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(),
EFBCacheTile{false, 0});
m_efb_cache_tiles_wide = tiles_wide;
}

Expand All @@ -633,7 +701,7 @@ void FramebufferManager::DestroyReadbackFramebuffer()
DestroyCache(m_efb_depth_cache);
}

void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index)
void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index, bool async)
{
FlushEFBPokes();
g_vertex_manager->OnCPUEFBAccess();
Expand Down Expand Up @@ -693,11 +761,18 @@ void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index)
}

// Wait until the copy is complete.
data.readback_texture->Flush();
if (!async)
{
data.readback_texture->Flush();
data.needs_flush = false;
}
else
{
data.needs_flush = true;
}
data.valid = true;
data.out_of_date = false;
if (IsUsingTiledEFBCache())
data.tiles[tile_index] = true;
data.tiles[tile_index].present = true;
}

void FramebufferManager::ClearEFB(const MathUtil::Rectangle<int>& rc, bool clear_color,
Expand Down
13 changes: 11 additions & 2 deletions Source/Core/VideoCommon/FramebufferManager.h
Expand Up @@ -99,7 +99,9 @@ class FramebufferManager final
float PeekEFBDepth(u32 x, u32 y);
void SetEFBCacheTileSize(u32 size);
void InvalidatePeekCache(bool forced = true);
void RefreshPeekCache();
void FlagPeekCacheAsOutOfDate();
void EndOfFrame();

// Writes a value to the framebuffer. This will never block, and writes will be batched.
void PokeEFBColor(u32 x, u32 y, u32 color);
Expand All @@ -117,6 +119,12 @@ class FramebufferManager final
};
static_assert(std::is_standard_layout<EFBPokeVertex>::value, "EFBPokeVertex is standard-layout");

struct EFBCacheTile
{
bool present;
u8 frame_access_mask;
};

// EFB cache - for CPU EFB access
// Tiles are ordered left-to-right, then top-to-bottom
struct EFBCacheData
Expand All @@ -125,9 +133,10 @@ class FramebufferManager final
std::unique_ptr<AbstractFramebuffer> framebuffer;
std::unique_ptr<AbstractStagingTexture> readback_texture;
std::unique_ptr<AbstractPipeline> copy_pipeline;
std::vector<bool> tiles;
std::vector<EFBCacheTile> tiles;
bool out_of_date;
bool valid;
bool needs_flush;
};

bool CreateEFBFramebuffer();
Expand All @@ -151,7 +160,7 @@ class FramebufferManager final
bool IsUsingTiledEFBCache() const;
bool IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* tile_index) const;
MathUtil::Rectangle<int> GetEFBCacheTileRect(u32 tile_index) const;
void PopulateEFBCache(bool depth, u32 tile_index);
void PopulateEFBCache(bool depth, u32 tile_index, bool async = false);

void CreatePokeVertices(std::vector<EFBPokeVertex>* destination_list, u32 x, u32 y, float z,
u32 color);
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoCommon/RenderBase.cpp
Expand Up @@ -1346,6 +1346,8 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
m_graphics_mod_manager.EndOfFrame();
}

g_framebuffer_manager->EndOfFrame();

if (xfb_addr && fb_width && fb_stride && fb_height)
{
// Get the current XFB from texture cache
Expand Down

0 comments on commit 147b334

Please sign in to comment.