@@ -174,8 +174,8 @@ class CommandProcessorManager
bool IsInterruptWaiting() const;

void SetCpClearRegister();
void SetCpControlRegister();
void SetCpStatusRegister();
void SetCpControlRegister(Core::System& system);
void SetCpStatusRegister(Core::System& system);

void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess);

Large diffs are not rendered by default.

@@ -3,21 +3,28 @@

#pragma once

#include <atomic>
#include <cstddef>
#include <optional>

#include "Common/BlockingLoop.h"
#include "Common/CommonTypes.h"
#include "Common/Event.h"
#include "Common/Flag.h"

class PointerWrap;

namespace Fifo
namespace Core
{
class System;
}
namespace CoreTiming
{
void Init();
void Shutdown();
void Prepare(); // Must be called from the CPU thread.
void DoState(PointerWrap& f);
void PauseAndLock(bool doLock, bool unpauseOnUnlock);
void UpdateWantDeterminism(bool want);
bool UseDeterministicGPUThread();
struct EventType;
}

namespace Fifo
{
// Used for diagnostics.
enum class SyncGPUReason
{
@@ -29,23 +36,96 @@ enum class SyncGPUReason
Swap,
AuxSpace,
};
// In deterministic GPU thread mode this waits for the GPU to be done with pending work.
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);

// In single core mode, this runs the GPU for a single slice.
// In dual core mode, this synchronizes with the GPU thread.
void SyncGPUForRegisterAccess();
class FifoManager final
{
public:
FifoManager();
FifoManager(const FifoManager& other) = delete;
FifoManager(FifoManager&& other) = delete;
FifoManager& operator=(const FifoManager& other) = delete;
FifoManager& operator=(FifoManager&& other) = delete;
~FifoManager();

void Init(Core::System& system);
void Shutdown();
void Prepare(Core::System& system); // Must be called from the CPU thread.
void DoState(PointerWrap& f);
void PauseAndLock(Core::System& system, bool doLock, bool unpauseOnUnlock);
void UpdateWantDeterminism(Core::System& system, bool want);
bool UseDeterministicGPUThread() const { return m_use_deterministic_gpu_thread; }

// In deterministic GPU thread mode this waits for the GPU to be done with pending work.
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);

// In single core mode, this runs the GPU for a single slice.
// In dual core mode, this synchronizes with the GPU thread.
void SyncGPUForRegisterAccess(Core::System& system);

void PushFifoAuxBuffer(const void* ptr, size_t size);
void* PopFifoAuxBuffer(size_t size);

void PushFifoAuxBuffer(const void* ptr, size_t size);
void* PopFifoAuxBuffer(size_t size);
void FlushGpu(Core::System& system);
void RunGpu(Core::System& system);
void GpuMaySleep();
void RunGpuLoop(Core::System& system);
void ExitGpuLoop(Core::System& system);
void EmulatorState(bool running);
void ResetVideoBuffer();

void FlushGpu();
void RunGpu();
void GpuMaySleep();
void RunGpuLoop();
void ExitGpuLoop();
void EmulatorState(bool running);
bool AtBreakpoint();
void ResetVideoBuffer();
private:
void RefreshConfig();
void ReadDataFromFifo(Core::System& system, u32 readPtr);
void ReadDataFromFifoOnCPU(Core::System& system, u32 readPtr);
int RunGpuOnCpu(Core::System& system, int ticks);
int WaitForGpuThread(Core::System& system, int ticks);
static void SyncGPUCallback(Core::System& system, u64 ticks, s64 cyclesLate);

static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024;

Common::BlockingLoop m_gpu_mainloop;

Common::Flag m_emu_running_state;

// Most of this array is unlikely to be faulted in...
u8 m_fifo_aux_data[FIFO_SIZE]{};
u8* m_fifo_aux_write_ptr = nullptr;
u8* m_fifo_aux_read_ptr = nullptr;

// This could be in SConfig, but it depends on multiple settings
// and can change at runtime.
bool m_use_deterministic_gpu_thread = false;

CoreTiming::EventType* m_event_sync_gpu = nullptr;

// STATE_TO_SAVE
u8* m_video_buffer = nullptr;
u8* m_video_buffer_read_ptr = nullptr;
std::atomic<u8*> m_video_buffer_write_ptr = nullptr;
std::atomic<u8*> m_video_buffer_seen_ptr = nullptr;
u8* m_video_buffer_pp_read_ptr = nullptr;
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
// write_ptr, despite it being atomic. In deterministic GPU thread mode,
// things get a bit more complicated:
// - The seen_ptr is written by the GPU thread, and points to what it's already
// processed as much of as possible - in the case of a partial command which
// caused it to stop, not the same as the read ptr. It's written by the GPU,
// under the lock, and updating the cond.
// - The write_ptr is written by the CPU thread after it copies data from the
// FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.

std::atomic<int> m_sync_ticks = 0;
bool m_syncing_suspended = false;
Common::Event m_sync_wakeup_event;

std::optional<size_t> m_config_callback_id = std::nullopt;
bool m_config_sync_gpu = false;
int m_config_sync_gpu_max_distance = 0;
int m_config_sync_gpu_min_distance = 0;
float m_config_sync_gpu_overclock = 0.0f;
};

bool AtBreakpoint(Core::System& system);
} // namespace Fifo
@@ -151,13 +151,14 @@ class RunCallback final : public Callback
{
m_in_display_list = true;

auto& system = Core::System::GetInstance();

if constexpr (is_preprocess)
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
const u8* const start_address = memory.GetPointer(address);

Fifo::PushFifoAuxBuffer(start_address, size);
system.GetFifo().PushFifoAuxBuffer(start_address, size);

if (start_address != nullptr)
{
@@ -168,13 +169,13 @@ class RunCallback final : public Callback
{
const u8* start_address;

if (Fifo::UseDeterministicGPUThread())
auto& fifo = system.GetFifo();
if (fifo.UseDeterministicGPUThread())
{
start_address = static_cast<u8*>(Fifo::PopFifoAuxBuffer(size));
start_address = static_cast<u8*>(fifo.PopFifoAuxBuffer(size));
}
else
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
start_address = memory.GetPointer(address);
}
@@ -333,7 +333,8 @@ static void RaiseEvent(int cycles_into_future)

CoreTiming::FromThread from = CoreTiming::FromThread::NON_CPU;
s64 cycles = 0; // we don't care about timings for dual core mode.
if (!Core::System::GetInstance().IsDualCoreMode() || Fifo::UseDeterministicGPUThread())
auto& system = Core::System::GetInstance();
if (!system.IsDualCoreMode() || system.GetFifo().UseDeterministicGPUThread())
{
from = CoreTiming::FromThread::CPU;

@@ -83,7 +83,8 @@ std::string VideoBackendBase::BadShaderFilename(const char* shader_stage, int co

void VideoBackendBase::Video_ExitLoop()
{
Fifo::ExitGpuLoop();
auto& system = Core::System::GetInstance();
system.GetFifo().ExitGpuLoop(system);
}

// Run from the CPU thread (from VideoInterface.cpp)
@@ -92,7 +93,8 @@ void VideoBackendBase::Video_OutputXFB(u32 xfb_addr, u32 fb_width, u32 fb_stride
{
if (m_initialized && g_renderer && !g_ActiveConfig.bImmediateXFB)
{
Fifo::SyncGPU(Fifo::SyncGPUReason::Swap);
auto& system = Core::System::GetInstance();
system.GetFifo().SyncGPU(Fifo::SyncGPUReason::Swap);

AsyncRequests::Event e;
e.time = ticks;
@@ -147,7 +149,8 @@ u32 VideoBackendBase::Video_GetQueryResult(PerfQueryType type)
return 0;
}

Fifo::SyncGPU(Fifo::SyncGPUReason::PerfQuery);
auto& system = Core::System::GetInstance();
system.GetFifo().SyncGPU(Fifo::SyncGPUReason::PerfQuery);

AsyncRequests::Event e;
e.time = 0;
@@ -185,7 +188,8 @@ u16 VideoBackendBase::Video_GetBoundingBox(int index)
warn_once = false;
}

Fifo::SyncGPU(Fifo::SyncGPUReason::BBox);
auto& system = Core::System::GetInstance();
system.GetFifo().SyncGPU(Fifo::SyncGPUReason::BBox);

AsyncRequests::Event e;
u16 result;
@@ -291,7 +295,8 @@ void VideoBackendBase::PopulateBackendInfoFromUI()

void VideoBackendBase::DoState(PointerWrap& p)
{
if (!Core::System::GetInstance().IsDualCoreMode())
auto& system = Core::System::GetInstance();
if (!system.IsDualCoreMode())
{
VideoCommon_DoState(p);
return;
@@ -304,7 +309,7 @@ void VideoBackendBase::DoState(PointerWrap& p)

// Let the GPU thread sleep after loading the state, so we're not spinning if paused after loading
// a state. The next GP burst will wake it up again.
Fifo::GpuMaySleep();
system.GetFifo().GpuMaySleep();
}

void VideoBackendBase::InitializeShared()
@@ -319,7 +324,7 @@ void VideoBackendBase::InitializeShared()
auto& system = Core::System::GetInstance();
auto& command_processor = system.GetCommandProcessor();
command_processor.Init(system);
Fifo::Init();
system.GetFifo().Init(system);
PixelEngine::Init();
BPInit();
VertexLoaderManager::Init();
@@ -336,6 +341,7 @@ void VideoBackendBase::ShutdownShared()
{
m_initialized = false;

auto& system = Core::System::GetInstance();
VertexLoaderManager::Clear();
Fifo::Shutdown();
system.GetFifo().Shutdown();
}
@@ -60,10 +60,10 @@ void VideoCommon_DoState(PointerWrap& p)
p.DoMarker("TMEM");

// FIFO
Fifo::DoState(p);
auto& system = Core::System::GetInstance();
system.GetFifo().DoState(p);
p.DoMarker("Fifo");

auto& system = Core::System::GetInstance();
auto& command_processor = system.GetCommandProcessor();
command_processor.DoState(p);
p.DoMarker("CommandProcessor");
@@ -257,13 +257,14 @@ void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size)

u32* currData = (u32*)(&xfmem) + address;
u32* newData;
if (Fifo::UseDeterministicGPUThread())
auto& system = Core::System::GetInstance();
auto& fifo = system.GetFifo();
if (fifo.UseDeterministicGPUThread())
{
newData = (u32*)Fifo::PopFifoAuxBuffer(size * sizeof(u32));
newData = (u32*)fifo.PopFifoAuxBuffer(size * sizeof(u32));
}
else
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
newData = (u32*)memory.GetPointer(g_main_cp_state.array_bases[array] +
g_main_cp_state.array_strides[array] * index);
@@ -293,7 +294,7 @@ void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size)
g_preprocess_cp_state.array_strides[array] * index);

const size_t buf_size = size * sizeof(u32);
Fifo::PushFifoAuxBuffer(new_data, buf_size);
system.GetFifo().PushFifoAuxBuffer(new_data, buf_size);
}

std::pair<std::string, std::string> GetXFRegInfo(u32 address, u32 value)