@@ -63,7 +63,7 @@ static AfterLoadCallbackFunc s_on_after_load_callback;
// Temporary undo state buffer
static std::vector<u8> g_undo_load_buffer;
static std::vector<u8> g_current_buffer;
static int g_loadDepth = 0;
static bool s_load_or_save_in_progress;

static std::mutex g_cs_undo_load_buffer;
static std::mutex g_cs_current_buffer;
@@ -72,7 +72,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread;

// Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 110; // Last changed in PR 8036
static const u32 STATE_VERSION = 111; // Last changed in PR 6321

// Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list,
@@ -170,6 +170,11 @@ static void DoState(PointerWrap& p)
return;
}

// Movie must be done before the video backend, because the window is redrawn in the video backend
// state load, and the frame number must be up-to-date.
Movie::DoState(p);
p.DoMarker("Movie");

// Begin with video backend, so that it gets a chance to clear its caches and writeback modified
// things to RAM
g_video_backend->DoState(p);
@@ -186,8 +191,6 @@ static void DoState(PointerWrap& p)
if (SConfig::GetInstance().bWii)
Wiimote::DoState(p);
p.DoMarker("Wiimote");
Movie::DoState(p);
p.DoMarker("Movie");
Gecko::DoState(p);
p.DoMarker("Gecko");

@@ -204,27 +207,31 @@ void LoadFromBuffer(std::vector<u8>& buffer)
return;
}

Core::RunAsCPUThread([&] {
u8* ptr = &buffer[0];
PointerWrap p(&ptr, PointerWrap::MODE_READ);
DoState(p);
});
Core::RunOnCPUThread(
[&] {
u8* ptr = &buffer[0];
PointerWrap p(&ptr, PointerWrap::MODE_READ);
DoState(p);
},
true);
}

void SaveToBuffer(std::vector<u8>& buffer)
{
Core::RunAsCPUThread([&] {
u8* ptr = nullptr;
PointerWrap p(&ptr, PointerWrap::MODE_MEASURE);

DoState(p);
const size_t buffer_size = reinterpret_cast<size_t>(ptr);
buffer.resize(buffer_size);

ptr = &buffer[0];
p.SetMode(PointerWrap::MODE_WRITE);
DoState(p);
});
Core::RunOnCPUThread(
[&] {
u8* ptr = nullptr;
PointerWrap p(&ptr, PointerWrap::MODE_MEASURE);

DoState(p);
const size_t buffer_size = reinterpret_cast<size_t>(ptr);
buffer.resize(buffer_size);

ptr = &buffer[0];
p.SetMode(PointerWrap::MODE_WRITE);
DoState(p);
},
true);
}

// return state number not in map
@@ -381,42 +388,51 @@ static void CompressAndDumpState(CompressAndDumpState_args save_args)

void SaveAs(const std::string& filename, bool wait)
{
Core::RunAsCPUThread([&] {
// Measure the size of the buffer.
u8* ptr = nullptr;
PointerWrap p(&ptr, PointerWrap::MODE_MEASURE);
DoState(p);
const size_t buffer_size = reinterpret_cast<size_t>(ptr);

// Then actually do the write.
{
std::lock_guard<std::mutex> lk(g_cs_current_buffer);
g_current_buffer.resize(buffer_size);
ptr = &g_current_buffer[0];
p.SetMode(PointerWrap::MODE_WRITE);
DoState(p);
}

if (p.GetMode() == PointerWrap::MODE_WRITE)
{
Core::DisplayMessage("Saving State...", 1000);
if (s_load_or_save_in_progress)
return;

CompressAndDumpState_args save_args;
save_args.buffer_vector = &g_current_buffer;
save_args.buffer_mutex = &g_cs_current_buffer;
save_args.filename = filename;
save_args.wait = wait;
s_load_or_save_in_progress = true;

Flush();
g_save_thread = std::thread(CompressAndDumpState, save_args);
g_compressAndDumpStateSyncEvent.Wait();
}
else
{
// someone aborted the save by changing the mode?
Core::DisplayMessage("Unable to save: Internal DoState Error", 4000);
}
});
Core::RunOnCPUThread(
[&] {
// Measure the size of the buffer.
u8* ptr = nullptr;
PointerWrap p(&ptr, PointerWrap::MODE_MEASURE);
DoState(p);
const size_t buffer_size = reinterpret_cast<size_t>(ptr);

// Then actually do the write.
{
std::lock_guard<std::mutex> lk(g_cs_current_buffer);
g_current_buffer.resize(buffer_size);
ptr = &g_current_buffer[0];
p.SetMode(PointerWrap::MODE_WRITE);
DoState(p);
}

if (p.GetMode() == PointerWrap::MODE_WRITE)
{
Core::DisplayMessage("Saving State...", 1000);

CompressAndDumpState_args save_args;
save_args.buffer_vector = &g_current_buffer;
save_args.buffer_mutex = &g_cs_current_buffer;
save_args.filename = filename;
save_args.wait = wait;

Flush();
g_save_thread = std::thread(CompressAndDumpState, save_args);
g_compressAndDumpStateSyncEvent.Wait();
}
else
{
// someone aborted the save by changing the mode?
Core::DisplayMessage("Unable to save: Internal DoState Error", 4000);
}
},
true);

s_load_or_save_in_progress = false;
}

bool ReadHeader(const std::string& filename, StateHeader& header)
@@ -515,7 +531,7 @@ static void LoadFileStateData(const std::string& filename, std::vector<u8>& ret_

void LoadAs(const std::string& filename)
{
if (!Core::IsRunning())
if (!Core::IsRunning() || s_load_or_save_in_progress)
{
return;
}
@@ -525,64 +541,65 @@ void LoadAs(const std::string& filename)
return;
}

Core::RunAsCPUThread([&] {
g_loadDepth++;

// Save temp buffer for undo load state
if (!Movie::IsJustStartingRecordingInputFromSaveState())
{
std::lock_guard<std::mutex> lk(g_cs_undo_load_buffer);
SaveToBuffer(g_undo_load_buffer);
if (Movie::IsMovieActive())
Movie::SaveRecording(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm");
else if (File::Exists(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"))
File::Delete(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm");
}

bool loaded = false;
bool loadedSuccessfully = false;

// brackets here are so buffer gets freed ASAP
{
std::vector<u8> buffer;
LoadFileStateData(filename, buffer);

if (!buffer.empty())
{
u8* ptr = &buffer[0];
PointerWrap p(&ptr, PointerWrap::MODE_READ);
DoState(p);
loaded = true;
loadedSuccessfully = (p.GetMode() == PointerWrap::MODE_READ);
}
}

if (loaded)
{
if (loadedSuccessfully)
{
Core::DisplayMessage(StringFromFormat("Loaded state from %s", filename.c_str()), 2000);
if (File::Exists(filename + ".dtm"))
Movie::LoadInput(filename + ".dtm");
else if (!Movie::IsJustStartingRecordingInputFromSaveState() &&
!Movie::IsJustStartingPlayingInputFromSaveState())
Movie::EndPlayInput(false);
}
else
{
Core::DisplayMessage("The savestate could not be loaded", OSD::Duration::NORMAL);

// since we could be in an inconsistent state now (and might crash or whatever), undo.
if (g_loadDepth < 2)
UndoLoadState();
}
}

if (s_on_after_load_callback)
s_on_after_load_callback();

g_loadDepth--;
});
s_load_or_save_in_progress = true;

Core::RunOnCPUThread(
[&] {
// Save temp buffer for undo load state
if (!Movie::IsJustStartingRecordingInputFromSaveState())
{
std::lock_guard<std::mutex> lk(g_cs_undo_load_buffer);
SaveToBuffer(g_undo_load_buffer);
if (Movie::IsMovieActive())
Movie::SaveRecording(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm");
else if (File::Exists(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"))
File::Delete(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm");
}

bool loaded = false;
bool loadedSuccessfully = false;

// brackets here are so buffer gets freed ASAP
{
std::vector<u8> buffer;
LoadFileStateData(filename, buffer);

if (!buffer.empty())
{
u8* ptr = &buffer[0];
PointerWrap p(&ptr, PointerWrap::MODE_READ);
DoState(p);
loaded = true;
loadedSuccessfully = (p.GetMode() == PointerWrap::MODE_READ);
}
}

if (loaded)
{
if (loadedSuccessfully)
{
Core::DisplayMessage(StringFromFormat("Loaded state from %s", filename.c_str()), 2000);
if (File::Exists(filename + ".dtm"))
Movie::LoadInput(filename + ".dtm");
else if (!Movie::IsJustStartingRecordingInputFromSaveState() &&
!Movie::IsJustStartingPlayingInputFromSaveState())
Movie::EndPlayInput(false);
}
else
{
Core::DisplayMessage("The savestate could not be loaded", OSD::Duration::NORMAL);

// since we could be in an inconsistent state now (and might crash or whatever), undo.
UndoLoadState();
}
}

if (s_on_after_load_callback)
s_on_after_load_callback();
},
true);

s_load_or_save_in_progress = false;
}

void SetOnAfterLoadCallback(AfterLoadCallbackFunc callback)
@@ -100,10 +100,13 @@ void HacksWidget::CreateWidgets()
m_disable_bounding_box =
new GraphicsBool(tr("Disable Bounding Box"), Config::GFX_HACK_BBOX_ENABLE, true);
m_vertex_rounding = new GraphicsBool(tr("Vertex Rounding"), Config::GFX_HACK_VERTEX_ROUDING);
m_save_texture_cache_state =
new GraphicsBool(tr("Save Texture Cache to State"), Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE);

other_layout->addWidget(m_fast_depth_calculation, 0, 0);
other_layout->addWidget(m_disable_bounding_box, 0, 1);
other_layout->addWidget(m_vertex_rounding, 1, 0);
other_layout->addWidget(m_save_texture_cache_state, 1, 1);

main_layout->addWidget(efb_box);
main_layout->addWidget(texture_cache_box);
@@ -244,6 +247,10 @@ void HacksWidget::AddDescriptions()
static const char TR_DISABLE_BOUNDINGBOX_DESCRIPTION[] =
QT_TR_NOOP("Disables bounding box emulation.\n\nThis may improve GPU performance "
"significantly, but some games will break.\n\nIf unsure, leave this checked.");
static const char TR_SAVE_TEXTURE_CACHE_TO_STATE_DESCRIPTION[] = QT_TR_NOOP(
"Includes the contents of the embedded frame buffer (EFB) and upscaled EFB copies "
"in save states. Fixes missing and/or non-upscaled textures/objects when loading "
"states at the cost of additional save/load time.\n\nIf unsure, leave this checked.");
static const char TR_VERTEX_ROUNDING_DESCRIPTION[] =
QT_TR_NOOP("Rounds 2D vertices to whole pixels.\n\nFixes graphical problems in some games at "
"higher internal resolutions. This setting has no effect when native internal "
@@ -259,6 +266,7 @@ void HacksWidget::AddDescriptions()
AddDescription(m_gpu_texture_decoding, TR_GPU_DECODING_DESCRIPTION);
AddDescription(m_fast_depth_calculation, TR_FAST_DEPTH_CALC_DESCRIPTION);
AddDescription(m_disable_bounding_box, TR_DISABLE_BOUNDINGBOX_DESCRIPTION);
AddDescription(m_save_texture_cache_state, TR_SAVE_TEXTURE_CACHE_TO_STATE_DESCRIPTION);
AddDescription(m_vertex_rounding, TR_VERTEX_ROUNDING_DESCRIPTION);
}

@@ -42,6 +42,7 @@ class HacksWidget final : public GraphicsWidget
QCheckBox* m_fast_depth_calculation;
QCheckBox* m_disable_bounding_box;
QCheckBox* m_vertex_rounding;
QCheckBox* m_save_texture_cache_state;
QCheckBox* m_defer_efb_copies;

void CreateWidgets();
@@ -11,6 +11,7 @@
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoState.h"

AsyncRequests AsyncRequests::s_singleton;

@@ -154,6 +155,10 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)
case Event::PERF_QUERY:
g_perf_query->FlushResults();
break;

case Event::DO_SAVE_STATE:
VideoCommon_DoState(*e.do_save_state.p);
break;
}
}

@@ -13,6 +13,7 @@
#include "Common/Flag.h"

struct EfbPokeData;
class PointerWrap;

class AsyncRequests
{
@@ -28,6 +29,7 @@ class AsyncRequests
SWAP_EVENT,
BBOX_READ,
PERF_QUERY,
DO_SAVE_STATE,
} type;
u64 time;

@@ -64,6 +66,11 @@ class AsyncRequests
struct
{
} perf_query;

struct
{
PointerWrap* p;
} do_save_state;
};
};

@@ -68,9 +68,6 @@ static void BPWritten(const BPCmd& bp)
----------------------------------------------------------------------------------------------------------------
*/

// check for invalid state, else unneeded configuration are built
g_video_backend->CheckInvalidState();

if (((s32*)&bpmem)[bp.address] == bp.newvalue)
{
if (!(bp.address == BPMEM_TRIGGER_EFB_COPY || bp.address == BPMEM_CLEARBBOX1 ||
@@ -299,14 +299,15 @@ void RunGpuLoop()
[] {
const SConfig& param = SConfig::GetInstance();

// Run events from the CPU thread.
AsyncRequests::GetInstance()->PullEvents();

// Do nothing while paused
if (!s_emu_running_state.IsSet())
return;

if (s_use_deterministic_gpu_thread)
{
AsyncRequests::GetInstance()->PullEvents();

// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
u8* seen_ptr = s_video_buffer_seen_ptr;
u8* write_ptr = s_video_buffer_write_ptr;
@@ -321,9 +322,6 @@ void RunGpuLoop()
else
{
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;

AsyncRequests::GetInstance()->PullEvents();

CommandProcessor::SetCPStatusFromGPU();

// check if we are able to run this buffer
@@ -7,8 +7,10 @@
#include "VideoCommon/FramebufferShaderGen.h"
#include "VideoCommon/VertexManagerBase.h"

#include "Common/ChunkFile.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Core/Config/GraphicsSettings.h"
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"
@@ -464,6 +466,20 @@ bool FramebufferManager::CompileReadbackPipelines()
return false;
}

// EFB restore pipeline
auto restore_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, FramebufferShaderGen::GenerateEFBRestorePixelShader());
if (!restore_shader)
return false;

config.framebuffer_state = GetEFBFramebufferState();
config.framebuffer_state.per_sample_shading = false;
config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader();
config.pixel_shader = restore_shader.get();
m_efb_restore_pipeline = g_renderer->CreatePipeline(config);
if (!m_efb_restore_pipeline)
return false;

return true;
}

@@ -842,3 +858,107 @@ void FramebufferManager::DestroyPokePipelines()
m_color_poke_pipeline.reset();
m_poke_vertex_format.reset();
}

void FramebufferManager::DoState(PointerWrap& p)
{
FlushEFBPokes();

bool save_efb_state = Config::Get(Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE);
p.Do(save_efb_state);
if (!save_efb_state)
return;

if (p.GetMode() == PointerWrap::MODE_WRITE || p.GetMode() == PointerWrap::MODE_MEASURE)
DoSaveState(p);
else
DoLoadState(p);
}

void FramebufferManager::DoSaveState(PointerWrap& p)
{
// For multisampling, we need to resolve first before we can save.
// This won't be bit-exact when loading, which could cause interesting rendering side-effects for
// a frame. But whatever, MSAA doesn't exactly behave that well anyway.
AbstractTexture* color_texture = ResolveEFBColorTexture(m_efb_color_texture->GetRect());
AbstractTexture* depth_texture = ResolveEFBDepthTexture(m_efb_depth_texture->GetRect());

// We don't want to save these as rendertarget textures, just the data itself when deserializing.
const TextureConfig color_texture_config(color_texture->GetWidth(), color_texture->GetHeight(),
color_texture->GetLevels(), color_texture->GetLayers(),
1, GetEFBColorFormat(), 0);
g_texture_cache->SerializeTexture(color_texture, color_texture_config, p);

if (GetEFBDepthFormat() == AbstractTextureFormat::D32F)
{
const TextureConfig depth_texture_config(
depth_texture->GetWidth(), depth_texture->GetHeight(), depth_texture->GetLevels(),
depth_texture->GetLayers(), 1,
AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()), 0);
g_texture_cache->SerializeTexture(depth_texture, depth_texture_config, p);
}
else
{
// If the EFB is backed by a D24S8 texture, we first have to convert it to R32F.
const TextureConfig temp_texture_config(depth_texture->GetWidth(), depth_texture->GetHeight(),
depth_texture->GetLevels(), depth_texture->GetLayers(),
1, AbstractTextureFormat::R32F,
AbstractTextureFlag_RenderTarget);
std::unique_ptr<AbstractTexture> temp_texture = g_renderer->CreateTexture(temp_texture_config);
std::unique_ptr<AbstractFramebuffer> temp_fb =
g_renderer->CreateFramebuffer(temp_texture.get(), nullptr);
if (temp_texture && temp_fb)
{
g_renderer->ScaleTexture(temp_fb.get(), temp_texture->GetRect(), depth_texture,
depth_texture->GetRect());

const TextureConfig depth_texture_config(
depth_texture->GetWidth(), depth_texture->GetHeight(), depth_texture->GetLevels(),
depth_texture->GetLayers(), 1, temp_texture->GetFormat(), 0);
g_texture_cache->SerializeTexture(depth_texture, depth_texture_config, p);
}
else
{
PanicAlert("Failed to create temp texture for depth saving");
g_texture_cache->SerializeTexture(color_texture, color_texture_config, p);
}
}
}

void FramebufferManager::DoLoadState(PointerWrap& p)
{
// Invalidate any peek cache tiles.
InvalidatePeekCache(true);

// Deserialize the color and depth textures. This could fail.
auto color_tex = g_texture_cache->DeserializeTexture(p);
auto depth_tex = g_texture_cache->DeserializeTexture(p);

// If the stereo mode is different in the save state, throw it away.
if (!color_tex || !depth_tex ||
color_tex->texture->GetLayers() != m_efb_color_texture->GetLayers())
{
WARN_LOG(VIDEO, "Failed to deserialize EFB contents. Clearing instead.");
g_renderer->SetAndClearFramebuffer(
m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}},
g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 1.0f : 0.0f);
return;
}

// Size differences are okay here, since the linear filtering will downscale/upscale it.
// Depth buffer is always point sampled, since we don't want to interpolate depth values.
const bool rescale = color_tex->texture->GetWidth() != m_efb_color_texture->GetWidth() ||
color_tex->texture->GetHeight() != m_efb_color_texture->GetHeight();

// Draw the deserialized textures over the EFB.
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(m_efb_framebuffer.get());
g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect());
g_renderer->SetPipeline(m_efb_restore_pipeline.get());
g_renderer->SetTexture(0, color_tex->texture.get());
g_renderer->SetTexture(1, depth_tex->texture.get());
g_renderer->SetSamplerState(0, rescale ? RenderState::GetLinearSamplerState() :
RenderState::GetPointSamplerState());
g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
}
@@ -17,6 +17,7 @@
#include "VideoCommon/TextureConfig.h"

class NativeVertexFormat;
class PointerWrap;

enum class EFBReinterpretType
{
@@ -95,6 +96,9 @@ class FramebufferManager final
void PokeEFBDepth(u32 x, u32 y, float depth);
void FlushEFBPokes();

// Save state load/save.
void DoState(PointerWrap& p);

protected:
struct EFBPokeVertex
{
@@ -145,6 +149,9 @@ class FramebufferManager final
void DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count,
const AbstractPipeline* pipeline);

void DoLoadState(PointerWrap& p);
void DoSaveState(PointerWrap& p);

std::unique_ptr<AbstractTexture> m_efb_color_texture;
std::unique_ptr<AbstractTexture> m_efb_convert_color_texture;
std::unique_ptr<AbstractTexture> m_efb_depth_texture;
@@ -156,6 +163,9 @@ class FramebufferManager final
std::unique_ptr<AbstractFramebuffer> m_efb_depth_resolve_framebuffer;
std::unique_ptr<AbstractPipeline> m_efb_depth_resolve_pipeline;

// Pipeline for restoring the contents of the EFB from a save state
std::unique_ptr<AbstractPipeline> m_efb_restore_pipeline;

// Format conversion shaders
std::array<std::unique_ptr<AbstractPipeline>, 6> m_format_conversion_pipelines;

@@ -644,4 +644,24 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
return ss.str();
}

std::string GenerateEFBRestorePixelShader()
{
std::stringstream ss;
EmitSamplerDeclarations(ss, 0, 2, false);
EmitPixelMainDeclaration(ss, 1, 0, "float4",
GetAPIType() == APIType::D3D ? "out float depth : SV_Depth, " : "");
ss << "{\n";
ss << " float3 coords = float3(v_tex0.x, "
<< (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin ? "1.0 - " : "")
<< "v_tex0.y, v_tex0.z);\n";
ss << " ocol0 = ";
EmitSampleTexture(ss, 0, "coords");
ss << ";\n";
ss << " " << (GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth") << " = ";
EmitSampleTexture(ss, 1, "coords");
ss << ".r;\n";
ss << "}\n";
return ss.str();
}

} // namespace FramebufferShaderGen
@@ -30,5 +30,6 @@ std::string GenerateEFBPokeVertexShader();
std::string GenerateColorPixelShader();
std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples);
std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format);
std::string GenerateEFBRestorePixelShader();

} // namespace FramebufferShaderGen
@@ -25,6 +25,7 @@
#include <imgui.h>

#include "Common/Assert.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/Config/Config.h"
#include "Common/Event.h"
@@ -1324,8 +1325,11 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
}

// Update our last xfb values
m_last_xfb_width = (fb_width < 1 || fb_width > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_width;
m_last_xfb_height = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height;
m_last_xfb_addr = xfb_addr;
m_last_xfb_ticks = ticks;
m_last_xfb_width = fb_width;
m_last_xfb_stride = fb_stride;
m_last_xfb_height = fb_height;
}
else
{
@@ -1681,6 +1685,27 @@ bool Renderer::UseVertexDepthRange() const
return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f;
}

void Renderer::DoState(PointerWrap& p)
{
p.Do(m_aspect_wide);
p.Do(m_frame_count);
p.Do(m_prev_efb_format);
p.Do(m_last_xfb_ticks);
p.Do(m_last_xfb_addr);
p.Do(m_last_xfb_width);
p.Do(m_last_xfb_stride);
p.Do(m_last_xfb_height);

if (p.GetMode() == PointerWrap::MODE_READ)
{
// Force the next xfb to be displayed.
m_last_xfb_id = std::numeric_limits<u64>::max();

// And actually display it.
Swap(m_last_xfb_addr, m_last_xfb_width, m_last_xfb_stride, m_last_xfb_height, m_last_xfb_ticks);
}
}

std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{
return std::make_unique<VideoCommon::AsyncShaderCompiler>();
@@ -41,6 +41,7 @@ class AbstractTexture;
class AbstractStagingTexture;
class NativeVertexFormat;
class NetPlayChatUI;
class PointerWrap;
struct TextureConfig;
struct ComputePipelineConfig;
struct AbstractPipelineConfig;
@@ -237,6 +238,7 @@ class Renderer
void ChangeSurface(void* new_surface_handle);
void ResizeSurface();
bool UseVertexDepthRange() const;
void DoState(PointerWrap& p);

virtual std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler();

@@ -356,9 +358,10 @@ class Renderer

// Tracking of XFB textures so we don't render duplicate frames.
u64 m_last_xfb_id = std::numeric_limits<u64>::max();

// Note: Only used for auto-ir
u64 m_last_xfb_ticks = 0;
u32 m_last_xfb_addr = 0;
u32 m_last_xfb_width = 0;
u32 m_last_xfb_stride = 0;
u32 m_last_xfb_height = 0;

// NOTE: The methods below are called on the framedumping thread.
@@ -15,6 +15,7 @@

#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/Hash.h"
@@ -23,6 +24,7 @@
#include "Common/MemoryUtil.h"
#include "Common/StringUtil.h"

#include "Core/Config/GraphicsSettings.h"
#include "Core/ConfigManager.h"
#include "Core/FifoPlayer/FifoPlayer.h"
#include "Core/FifoPlayer/FifoRecorder.h"
@@ -404,6 +406,329 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e
config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer)));
}

bool TextureCacheBase::CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format)
{
if (m_readback_texture && m_readback_texture->GetConfig().width >= width &&
m_readback_texture->GetConfig().height >= height &&
m_readback_texture->GetConfig().format == format)
{
return true;
}

TextureConfig staging_config(std::max(width, 128u), std::max(height, 128u), 1, 1, 1, format, 0);
m_readback_texture.reset();
m_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, staging_config);
return m_readback_texture != nullptr;
}

void TextureCacheBase::SerializeTexture(AbstractTexture* tex, const TextureConfig& config,
PointerWrap& p)
{
// If we're in measure mode, skip the actual readback to save some time.
const bool skip_readback = p.GetMode() == PointerWrap::MODE_MEASURE;
p.DoPOD(config);

std::vector<u8> texture_data;
if (skip_readback || CheckReadbackTexture(config.width, config.height, config.format))
{
// Save out each layer of the texture to the staging texture, and then
// append it onto the end of the vector. This gives us all the sub-images
// in one single buffer which can be written out to the save state.
for (u32 layer = 0; layer < config.layers; layer++)
{
for (u32 level = 0; level < config.levels; level++)
{
u32 level_width = std::max(config.width >> level, 1u);
u32 level_height = std::max(config.height >> level, 1u);
auto rect = tex->GetConfig().GetMipRect(level);
if (!skip_readback)
m_readback_texture->CopyFromTexture(tex, rect, layer, level, rect);

size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width);
size_t size = stride * level_height;
size_t start = texture_data.size();
texture_data.resize(texture_data.size() + size);
if (!skip_readback)
m_readback_texture->ReadTexels(rect, &texture_data[start], static_cast<u32>(stride));
}
}
}
else
{
PanicAlert("Failed to create staging texture for serialization");
}

p.Do(texture_data);
}

std::optional<TextureCacheBase::TexPoolEntry> TextureCacheBase::DeserializeTexture(PointerWrap& p)
{
TextureConfig config;
p.Do(config);

std::vector<u8> texture_data;
p.Do(texture_data);

if (p.GetMode() != PointerWrap::MODE_READ || texture_data.empty())
return std::nullopt;

auto tex = AllocateTexture(config);
if (!tex)
{
PanicAlert("Failed to create texture for deserialization");
return std::nullopt;
}

size_t start = 0;
for (u32 layer = 0; layer < config.layers; layer++)
{
for (u32 level = 0; level < config.levels; level++)
{
u32 level_width = std::max(config.width >> level, 1u);
u32 level_height = std::max(config.height >> level, 1u);
size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width);
size_t size = stride * level_height;
if ((start + size) > texture_data.size())
{
ERROR_LOG(VIDEO, "Insufficient texture data for layer %u level %u", layer, level);
return tex;
}

tex->texture->Load(level, level_width, level_height, level_width, &texture_data[start], size);
start += size;
}
}

return tex;
}

void TextureCacheBase::DoState(PointerWrap& p)
{
// Flush all pending XFB copies before either loading or saving.
FlushEFBCopies();

p.Do(last_entry_id);

if (p.GetMode() == PointerWrap::MODE_WRITE || p.GetMode() == PointerWrap::MODE_MEASURE)
DoSaveState(p);
else
DoLoadState(p);
}

void TextureCacheBase::DoSaveState(PointerWrap& p)
{
std::map<const TCacheEntry*, u32> entry_map;
std::vector<TCacheEntry*> entries_to_save;
auto ShouldSaveEntry = [](const TCacheEntry* entry) {
// We skip non-copies as they can be decoded from RAM when the state is loaded.
// Storing them would duplicate data in the save state file, adding to decompression time.
return entry->IsCopy();
};
auto AddCacheEntryToMap = [&entry_map, &entries_to_save, &p](TCacheEntry* entry) -> u32 {
auto iter = entry_map.find(entry);
if (iter != entry_map.end())
return iter->second;

// Since we are sequentially allocating texture entries, we need to save the textures in the
// same order they were collected. This is because of iterating both the address and hash maps.
// Therefore, the map is used for fast lookup, and the vector for ordering.
u32 id = static_cast<u32>(entry_map.size());
entry_map.emplace(entry, id);
entries_to_save.push_back(entry);
return id;
};
auto GetCacheEntryId = [&entry_map](const TCacheEntry* entry) -> std::optional<u32> {
auto iter = entry_map.find(entry);
return iter != entry_map.end() ? std::make_optional(iter->second) : std::nullopt;
};

// Transform the textures_by_address and textures_by_hash maps to a mapping
// of address/hash to entry ID.
std::vector<std::pair<u32, u32>> textures_by_address_list;
std::vector<std::pair<u64, u32>> textures_by_hash_list;
if (Config::Get(Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE))
{
for (const auto& it : textures_by_address)
{
if (ShouldSaveEntry(it.second))
{
u32 id = AddCacheEntryToMap(it.second);
textures_by_address_list.push_back(std::make_pair(it.first, id));
}
}
for (const auto& it : textures_by_hash)
{
if (ShouldSaveEntry(it.second))
{
u32 id = AddCacheEntryToMap(it.second);
textures_by_hash_list.push_back(std::make_pair(it.first, id));
}
}
}

// Save the texture cache entries out in the order the were referenced.
u32 size = static_cast<u32>(entries_to_save.size());
p.Do(size);
for (TCacheEntry* entry : entries_to_save)
{
g_texture_cache->SerializeTexture(entry->texture.get(), entry->texture->GetConfig(), p);
entry->DoState(p);
}
p.DoMarker("TextureCacheEntries");

// Save references for each cache entry.
// As references are circular, we need to have everything created before linking entries.
std::set<std::pair<u32, u32>> reference_pairs;
for (const auto& it : entry_map)
{
const TCacheEntry* entry = it.first;
auto id1 = GetCacheEntryId(entry);
if (!id1)
continue;

for (const TCacheEntry* referenced_entry : entry->references)
{
auto id2 = GetCacheEntryId(referenced_entry);
if (!id2)
continue;

auto refpair1 = std::make_pair(*id1, *id2);
auto refpair2 = std::make_pair(*id2, *id1);
if (reference_pairs.count(refpair1) == 0 && reference_pairs.count(refpair2) == 0)
reference_pairs.insert(refpair1);
}
}

size = static_cast<u32>(reference_pairs.size());
p.Do(size);
for (const auto& it : reference_pairs)
{
p.Do(it.first);
p.Do(it.second);
}

size = static_cast<u32>(textures_by_address_list.size());
p.Do(size);
for (const auto& it : textures_by_address_list)
{
p.Do(it.first);
p.Do(it.second);
}

size = static_cast<u32>(textures_by_hash_list.size());
p.Do(size);
for (const auto& it : textures_by_hash_list)
{
p.Do(it.first);
p.Do(it.second);
}

// Free the readback texture to potentially save host-mapped GPU memory, depending on where
// the driver mapped the staging buffer.
m_readback_texture.reset();
}

void TextureCacheBase::DoLoadState(PointerWrap& p)
{
// Helper for getting a cache entry from an ID.
std::map<u32, TCacheEntry*> id_map;
auto GetEntry = [&id_map](u32 id) {
auto iter = id_map.find(id);
return iter == id_map.end() ? nullptr : iter->second;
};

// Only clear out state when actually restoring/loading.
// Since we throw away entries when not in loading mode now, we don't need to check
// before inserting entries into the cache, as GetEntry will always return null.
const bool commit_state = p.GetMode() == PointerWrap::MODE_READ;
if (commit_state)
Invalidate();

// Preload all cache entries.
u32 size = 0;
p.Do(size);
for (u32 i = 0; i < size; i++)
{
// Even if the texture isn't valid, we still need to create the cache entry object
// to update the point in the state state. We'll just throw it away if it's invalid.
auto tex = g_texture_cache->DeserializeTexture(p);
TCacheEntry* entry = new TCacheEntry(std::move(tex->texture), std::move(tex->framebuffer));
entry->textures_by_hash_iter = g_texture_cache->textures_by_hash.end();
entry->DoState(p);
if (entry->texture && commit_state)
id_map.emplace(i, entry);
else
delete entry;
}
p.DoMarker("TextureCacheEntries");

// Link all cache entry references.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u32 id1 = 0, id2 = 0;
p.Do(id1);
p.Do(id2);
TCacheEntry* e1 = GetEntry(id1);
TCacheEntry* e2 = GetEntry(id2);
if (e1 && e2)
e1->CreateReference(e2);
}

// Fill in address map.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u32 addr = 0;
u32 id = 0;
p.Do(addr);
p.Do(id);

TCacheEntry* entry = GetEntry(id);
if (entry)
textures_by_address.emplace(addr, entry);
}

// Fill in hash map.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u64 hash = 0;
u32 id = 0;
p.Do(hash);
p.Do(id);

TCacheEntry* entry = GetEntry(id);
if (entry)
entry->textures_by_hash_iter = textures_by_hash.emplace(hash, entry);
}
}

void TextureCacheBase::TCacheEntry::DoState(PointerWrap& p)
{
p.Do(addr);
p.Do(size_in_bytes);
p.Do(base_hash);
p.Do(hash);
p.Do(format);
p.Do(memory_stride);
p.Do(is_efb_copy);
p.Do(is_custom_tex);
p.Do(may_have_overlapping_textures);
p.Do(tmem_only);
p.Do(has_arbitrary_mips);
p.Do(should_force_safe_hashing);
p.Do(is_xfb_copy);
p.Do(is_xfb_container);
p.Do(id);
p.Do(reference_changed);
p.Do(native_width);
p.Do(native_height);
p.Do(native_levels);
p.Do(frameCount);
}

TextureCacheBase::TCacheEntry*
TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt)
@@ -24,6 +24,7 @@

class AbstractFramebuffer;
class AbstractStagingTexture;
class PointerWrap;
struct VideoConfig;

struct TextureAndTLUTFormat
@@ -185,6 +186,17 @@ class TextureCacheBase
u32 GetNumLevels() const { return texture->GetConfig().levels; }
u32 GetNumLayers() const { return texture->GetConfig().layers; }
AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; }
void DoState(PointerWrap& p);
};

// Minimal version of TCacheEntry just for TexPool
struct TexPoolEntry
{
std::unique_ptr<AbstractTexture> texture;
std::unique_ptr<AbstractFramebuffer> framebuffer;
int frameCount = FRAMECOUNT_INVALID;

TexPoolEntry(std::unique_ptr<AbstractTexture> tex, std::unique_ptr<AbstractFramebuffer> fb);
};

TextureCacheBase();
@@ -224,6 +236,13 @@ class TextureCacheBase
// Flushes all pending EFB copies to emulated RAM.
void FlushEFBCopies();

// Texture Serialization
void SerializeTexture(AbstractTexture* tex, const TextureConfig& config, PointerWrap& p);
std::optional<TexPoolEntry> DeserializeTexture(PointerWrap& p);

// Save States
void DoState(PointerWrap& p);

// Returns false if the top/bottom row coefficients are zero.
static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients);

@@ -256,15 +275,6 @@ class TextureCacheBase
static std::bitset<8> valid_bind_points;

private:
// Minimal version of TCacheEntry just for TexPool
struct TexPoolEntry
{
std::unique_ptr<AbstractTexture> texture;
std::unique_ptr<AbstractFramebuffer> framebuffer;
int frameCount = FRAMECOUNT_INVALID;

TexPoolEntry(std::unique_ptr<AbstractTexture> tex, std::unique_ptr<AbstractFramebuffer> fb);
};
using TexAddrCache = std::multimap<u32, TCacheEntry*>;
using TexHashCache = std::multimap<u64, TCacheEntry*>;
using TexPool = std::unordered_multimap<TextureConfig, TexPoolEntry>;
@@ -319,6 +329,10 @@ class TextureCacheBase
// Returns an EFB copy staging texture to the pool, so it can be re-used.
void ReleaseEFBCopyStagingTexture(std::unique_ptr<AbstractStagingTexture> tex);

bool CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format);
void DoSaveState(PointerWrap& p);
void DoLoadState(PointerWrap& p);

TexAddrCache textures_by_address;
TexHashCache textures_by_hash;
TexPool texture_pool;
@@ -354,6 +368,11 @@ class TextureCacheBase
// List of pending EFB copies. It is important that the order is preserved for these,
// so that overlapping textures are written to guest RAM in the order they are issued.
std::vector<TCacheEntry*> m_pending_efb_copies;

// Staging texture used for readbacks.
// We store this in the class so that the same staging texture can be used for multiple
// readbacks, saving the overhead of allocating a new buffer every time.
std::unique_ptr<AbstractStagingTexture> m_readback_texture;
};

extern std::unique_ptr<TextureCacheBase> g_texture_cache;
@@ -338,9 +338,6 @@ void VertexManagerBase::Flush()

m_is_flushed = true;

// loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState();

#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%u, numchan=%u, dualtex=%u, ztex=%u, cole=%u, alpe=%u, ze=%u",
g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans,
@@ -464,6 +461,16 @@ void VertexManagerBase::Flush()

void VertexManagerBase::DoState(PointerWrap& p)
{
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Flush old vertex data before loading state.
Flush();

// Clear all caches that touch RAM
// (? these don't appear to touch any emulation state that gets saved. moved to on load only.)
VertexLoaderManager::MarkAllDirty();
}

p.Do(m_zslope);
}

@@ -40,6 +40,7 @@
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
@@ -236,41 +237,22 @@ void VideoBackendBase::PopulateBackendInfo()
g_Config.Refresh();
}

// Run from the CPU thread
void VideoBackendBase::DoState(PointerWrap& p)
{
bool software = false;
p.Do(software);

if (p.GetMode() == PointerWrap::MODE_READ && software == true)
{
// change mode to abort load of incompatible save state.
p.SetMode(PointerWrap::MODE_VERIFY);
}

VideoCommon_DoState(p);
p.DoMarker("VideoCommon");

// Refresh state.
if (p.GetMode() == PointerWrap::MODE_READ)
if (!SConfig::GetInstance().bCPUThread)
{
m_invalid = true;

// Clear all caches that touch RAM
// (? these don't appear to touch any emulation state that gets saved. moved to on load only.)
VertexLoaderManager::MarkAllDirty();
VideoCommon_DoState(p);
return;
}
}

void VideoBackendBase::CheckInvalidState()
{
if (m_invalid)
{
m_invalid = false;
AsyncRequests::Event ev = {};
ev.do_save_state.p = &p;
ev.type = AsyncRequests::Event::DO_SAVE_STATE;
AsyncRequests::GetInstance()->PushEvent(ev, true);

BPReload();
g_texture_cache->Invalidate();
}
// Let the GPU thread sleep after loading the state, so we're not spinning if paused after loading
// a state. The next GP burst will wake it up again.
Fifo::GpuMaySleep();
}

void VideoBackendBase::InitializeShared()
@@ -282,8 +264,6 @@ void VideoBackendBase::InitializeShared()
// do not initialize again for the config window
m_initialized = true;

m_invalid = false;

CommandProcessor::Init();
Fifo::Init();
OpcodeDecoder::Init();
@@ -63,18 +63,14 @@ class VideoBackendBase
// Called by the UI thread when the graphics config is opened.
static void PopulateBackendInfo();

// the implementation needs not do synchronization logic, because calls to it are surrounded by
// PauseAndLock now
// Wrapper function which pushes the event to the GPU thread.
void DoState(PointerWrap& p);

void CheckInvalidState();

protected:
void InitializeShared();
void ShutdownShared();

bool m_initialized = false;
bool m_invalid = false;
};

extern std::vector<std::unique_ptr<VideoBackendBase>> g_available_video_backends;
@@ -10,9 +10,12 @@
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
@@ -21,6 +24,15 @@

void VideoCommon_DoState(PointerWrap& p)
{
bool software = false;
p.Do(software);

if (p.GetMode() == PointerWrap::MODE_READ && software == true)
{
// change mode to abort load of incompatible save state.
p.SetMode(PointerWrap::MODE_VERIFY);
}

// BP Memory
p.Do(bpmem);
p.DoMarker("BP Memory");
@@ -63,5 +75,19 @@ void VideoCommon_DoState(PointerWrap& p)
BoundingBox::DoState(p);
p.DoMarker("BoundingBox");

// TODO: search for more data that should be saved and add it here
g_framebuffer_manager->DoState(p);
p.DoMarker("FramebufferManager");

g_texture_cache->DoState(p);
p.DoMarker("TextureCache");

g_renderer->DoState(p);
p.DoMarker("Renderer");

// Refresh state.
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Inform backend of new state from registers.
BPReload();
}
}