@@ -43,7 +43,7 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)

bool PerfQuery::IsFlushed() const
{
return 0 == m_query_count;
return m_query_count.load(std::memory_order_relaxed) == 0;
}

// TODO: could selectively flush things, but I don't think that will do much
@@ -54,8 +54,9 @@ void PerfQuery::FlushResults()

void PerfQuery::ResetQuery()
{
m_query_count = 0;
std::fill(std::begin(m_results), std::end(m_results), 0);
m_query_count.store(0, std::memory_order_relaxed);
for (size_t i = 0; i < m_results.size(); ++i)
m_results[i].store(0, std::memory_order_relaxed);
}

u32 PerfQuery::GetQueryResult(PerfQueryType type)
@@ -64,19 +65,20 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)

if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC];
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP];
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS];
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}

return result;
@@ -97,11 +99,13 @@ PerfQueryGL::~PerfQueryGL()

void PerfQueryGL::EnableQuery(PerfQueryGroup type)
{
const u32 query_count = m_query_count.load(std::memory_order_relaxed);

// Is this sane?
if (m_query_count > m_query_buffer.size() / 2)
if (query_count > m_query_buffer.size() / 2)
WeakFlush();

if (m_query_buffer.size() == m_query_count)
if (m_query_buffer.size() == query_count)
{
FlushOne();
// ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
@@ -110,12 +114,12 @@ void PerfQueryGL::EnableQuery(PerfQueryGroup type)
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

glBeginQuery(m_query_type, entry.query_id);
entry.query_type = type;

++m_query_count;
m_query_count.fetch_add(1, std::memory_order_relaxed);
}
}
void PerfQueryGL::DisableQuery(PerfQueryGroup type)
@@ -164,10 +168,10 @@ void PerfQueryGL::FlushOne()
if (g_ActiveConfig.iMultisamples > 1)
result /= g_ActiveConfig.iMultisamples;

m_results[entry.query_type] += result;
m_results[entry.query_type].fetch_add(result, std::memory_order_relaxed);

m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
m_query_count.fetch_sub(1, std::memory_order_relaxed);
}

// TODO: could selectively flush things, but I don't think that will do much
@@ -191,11 +195,12 @@ PerfQueryGLESNV::~PerfQueryGLESNV()

void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
{
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
// Is this sane?
if (m_query_count > m_query_buffer.size() / 2)
if (query_count > m_query_buffer.size() / 2)
WeakFlush();

if (m_query_buffer.size() == m_query_count)
if (m_query_buffer.size() == query_count)
{
FlushOne();
// ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!");
@@ -204,12 +209,12 @@ void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

glBeginOcclusionQueryNV(entry.query_id);
entry.query_type = type;

++m_query_count;
m_query_count.fetch_add(1, std::memory_order_relaxed);
}
}
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
@@ -251,11 +256,13 @@ void PerfQueryGLESNV::FlushOne()
// NOTE: Reported pixel metrics should be referenced to native resolution
// TODO: Dropping the lower 2 bits from this count should be closer to actual
// hardware behavior when drawing triangles.
m_results[entry.query_type] += static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);

m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
m_query_count.fetch_sub(1, std::memory_order_relaxed);
}

// TODO: could selectively flush things, but I don't think that will do much
@@ -43,8 +43,9 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Block if there are no free slots.
// Otherwise, try to keep half of them available.
if (m_query_count > m_query_buffer.size() / 2)
PartialFlush(m_query_count == PERF_QUERY_BUFFER_SIZE);
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
if (query_count > m_query_buffer.size() / 2)
PartialFlush(query_count == PERF_QUERY_BUFFER_SIZE);

// Ensure command buffer is ready to go before beginning the query, that way we don't submit
// a buffer with open queries.
@@ -73,16 +74,17 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
{
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
m_query_count++;
m_query_count.fetch_add(1, std::memory_order_relaxed);
}
}

void PerfQuery::ResetQuery()
{
m_query_count = 0;
m_query_count.store(0, std::memory_order_relaxed);
m_query_readback_pos = 0;
m_query_next_pos = 0;
std::fill(std::begin(m_results), std::end(m_results), 0);
for (size_t i = 0; i < m_results.size(); ++i)
m_results[i].store(0, std::memory_order_relaxed);

// Reset entire query pool, ensuring all queries are ready to write to.
StateTracker::GetInstance()->EndRenderPass();
@@ -96,13 +98,22 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
result = m_results[PQG_ZCOMP_ZCOMPLOC];
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
result = m_results[PQG_ZCOMP];
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
result = m_results[PQG_EFB_COPY_CLOCKS];
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}

return result / 4;
}
@@ -115,7 +126,7 @@ void PerfQuery::FlushResults()

bool PerfQuery::IsFlushed() const
{
return m_query_count == 0;
return m_query_count.load(std::memory_order_relaxed) == 0;
}

bool PerfQuery::CreateQueryPool()
@@ -144,7 +155,7 @@ void PerfQuery::ReadbackQueries()
const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter();

// Need to save these since ProcessResults will modify them.
const u32 outstanding_queries = m_query_count;
const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed);
u32 readback_count = 0;
for (u32 i = 0; i < outstanding_queries; i++)
{
@@ -171,7 +182,7 @@ void PerfQuery::ReadbackQueries()
void PerfQuery::ReadbackQueries(u32 query_count)
{
// Should be at maximum query_count queries pending.
ASSERT(query_count <= m_query_count &&
ASSERT(query_count <= m_query_count.load(std::memory_order_relaxed) &&
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);

// Read back from the GPU.
@@ -194,13 +205,15 @@ void PerfQuery::ReadbackQueries(u32 query_count)
entry.has_value = false;

// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] +=
static_cast<u32>(static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight());
const u64 native_res_result = static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
g_renderer->GetTargetWidth() * EFB_HEIGHT /
g_renderer->GetTargetHeight();
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
}

m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
m_query_count -= query_count;
m_query_count.fetch_sub(query_count, std::memory_order_relaxed);
}

void PerfQuery::PartialFlush(bool blocking)
@@ -119,11 +119,11 @@ void Init()
m_tokenReg = 0;

memset(&fifo, 0, sizeof(fifo));
fifo.bFF_Breakpoint = 0;
fifo.bFF_HiWatermark = 0;
fifo.bFF_HiWatermarkInt = 0;
fifo.bFF_LoWatermark = 0;
fifo.bFF_LoWatermarkInt = 0;
fifo.bFF_Breakpoint.store(0, std::memory_order_relaxed);
fifo.bFF_HiWatermark.store(0, std::memory_order_relaxed);
fifo.bFF_HiWatermarkInt.store(0, std::memory_order_relaxed);
fifo.bFF_LoWatermark.store(0, std::memory_order_relaxed);
fifo.bFF_LoWatermarkInt.store(0, std::memory_order_relaxed);

s_interrupt_set.Clear();
s_interrupt_waiting.Clear();
@@ -368,7 +368,7 @@ void GatherPipeBursted()
}

// If the game is running close to overflowing, make the exception checking more frequent.
if (fifo.bFF_HiWatermark)
if (fifo.bFF_HiWatermark.load(std::memory_order_relaxed) != 0)
CoreTiming::ForceExceptionCheck(0);

fifo.CPReadWriteDistance.fetch_add(GATHER_PIPE_SIZE, std::memory_order_seq_cst);
@@ -427,47 +427,53 @@ bool IsInterruptWaiting()
void SetCPStatusFromGPU()
{
// breakpoint
if (fifo.bFF_BPEnable)
const bool breakpoint = fifo.bFF_Breakpoint.load(std::memory_order_relaxed);
if (fifo.bFF_BPEnable.load(std::memory_order_relaxed) != 0)
{
if (fifo.CPBreakpoint.load(std::memory_order_relaxed) ==
fifo.CPReadPointer.load(std::memory_order_relaxed))
{
if (!fifo.bFF_Breakpoint)
if (!breakpoint)
{
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Hit breakpoint at {}",
fifo.CPReadPointer.load(std::memory_order_relaxed));
fifo.bFF_Breakpoint = true;
fifo.bFF_Breakpoint.store(1, std::memory_order_relaxed);
}
}
else
{
if (fifo.bFF_Breakpoint)
if (breakpoint)
{
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
fifo.CPReadPointer.load(std::memory_order_relaxed));
fifo.bFF_Breakpoint.store(0, std::memory_order_relaxed);
}
fifo.bFF_Breakpoint = false;
}
}
else
{
if (fifo.bFF_Breakpoint)
if (breakpoint)
{
DEBUG_LOG_FMT(COMMANDPROCESSOR, "Cleared breakpoint at {}",
fifo.CPReadPointer.load(std::memory_order_relaxed));
fifo.bFF_Breakpoint = false;
}
fifo.bFF_Breakpoint = false;
}

// overflow & underflow check
fifo.bFF_HiWatermark =
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark);
fifo.bFF_LoWatermark =
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark);

bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
bool undfInt = fifo.bFF_LoWatermark && fifo.bFF_LoWatermarkInt;
fifo.bFF_HiWatermark.store(
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark),
std::memory_order_relaxed);
fifo.bFF_LoWatermark.store(
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark),
std::memory_order_relaxed);

bool bpInt = fifo.bFF_Breakpoint.load(std::memory_order_relaxed) &&
fifo.bFF_BPInt.load(std::memory_order_relaxed);
bool ovfInt = fifo.bFF_HiWatermark.load(std::memory_order_relaxed) &&
fifo.bFF_HiWatermarkInt.load(std::memory_order_relaxed);
bool undfInt = fifo.bFF_LoWatermark.load(std::memory_order_relaxed) &&
fifo.bFF_LoWatermarkInt.load(std::memory_order_relaxed);

bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;

@@ -493,14 +499,19 @@ void SetCPStatusFromGPU()
void SetCPStatusFromCPU()
{
// overflow & underflow check
fifo.bFF_HiWatermark =
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark);
fifo.bFF_LoWatermark =
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark);

bool bpInt = fifo.bFF_Breakpoint && fifo.bFF_BPInt;
bool ovfInt = fifo.bFF_HiWatermark && fifo.bFF_HiWatermarkInt;
bool undfInt = fifo.bFF_LoWatermark && fifo.bFF_LoWatermarkInt;
fifo.bFF_HiWatermark.store(
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > fifo.CPHiWatermark),
std::memory_order_relaxed);
fifo.bFF_LoWatermark.store(
(fifo.CPReadWriteDistance.load(std::memory_order_relaxed) < fifo.CPLoWatermark),
std::memory_order_relaxed);

bool bpInt = fifo.bFF_Breakpoint.load(std::memory_order_relaxed) &&
fifo.bFF_BPInt.load(std::memory_order_relaxed);
bool ovfInt = fifo.bFF_HiWatermark.load(std::memory_order_relaxed) &&
fifo.bFF_HiWatermarkInt.load(std::memory_order_relaxed);
bool undfInt = fifo.bFF_LoWatermark.load(std::memory_order_relaxed) &&
fifo.bFF_LoWatermarkInt.load(std::memory_order_relaxed);

bool interrupt = (bpInt || ovfInt || undfInt) && m_CPCtrlReg.GPReadEnable;

@@ -526,14 +537,15 @@ void SetCPStatusFromCPU()
void SetCpStatusRegister()
{
// Here always there is one fifo attached to the GPU
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint;
m_CPStatusReg.Breakpoint = fifo.bFF_Breakpoint.load(std::memory_order_relaxed);
m_CPStatusReg.ReadIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
(fifo.CPReadPointer.load(std::memory_order_relaxed) ==
fifo.CPWritePointer.load(std::memory_order_relaxed));
m_CPStatusReg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
Fifo::AtBreakpoint() || !fifo.bFF_GPReadEnable;
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark;
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark;
Fifo::AtBreakpoint() ||
!fifo.bFF_GPReadEnable.load(std::memory_order_relaxed);
m_CPStatusReg.UnderflowLoWatermark = fifo.bFF_LoWatermark.load(std::memory_order_relaxed);
m_CPStatusReg.OverflowHiWatermark = fifo.bFF_HiWatermark.load(std::memory_order_relaxed);

DEBUG_LOG_FMT(COMMANDPROCESSOR, "\t Read from STATUS_REGISTER : {:04x}", m_CPStatusReg.Hex);
DEBUG_LOG_FMT(
@@ -545,15 +557,15 @@ void SetCpStatusRegister()

void SetCpControlRegister()
{
fifo.bFF_BPInt = m_CPCtrlReg.BPInt;
fifo.bFF_BPEnable = m_CPCtrlReg.BPEnable;
fifo.bFF_HiWatermarkInt = m_CPCtrlReg.FifoOverflowIntEnable;
fifo.bFF_LoWatermarkInt = m_CPCtrlReg.FifoUnderflowIntEnable;
fifo.bFF_GPLinkEnable = m_CPCtrlReg.GPLinkEnable;
fifo.bFF_BPInt.store(m_CPCtrlReg.BPInt, std::memory_order_relaxed);
fifo.bFF_BPEnable.store(m_CPCtrlReg.BPEnable, std::memory_order_relaxed);
fifo.bFF_HiWatermarkInt.store(m_CPCtrlReg.FifoOverflowIntEnable, std::memory_order_relaxed);
fifo.bFF_LoWatermarkInt.store(m_CPCtrlReg.FifoUnderflowIntEnable, std::memory_order_relaxed);
fifo.bFF_GPLinkEnable.store(m_CPCtrlReg.GPLinkEnable, std::memory_order_relaxed);

if (fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable)
if (fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) && !m_CPCtrlReg.GPReadEnable)
{
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
fifo.bFF_GPReadEnable.store(m_CPCtrlReg.GPReadEnable, std::memory_order_relaxed);
Fifo::FlushGpu();
}
else
@@ -562,8 +574,10 @@ void SetCpControlRegister()
}

DEBUG_LOG_FMT(COMMANDPROCESSOR, "\t GPREAD {} | BP {} | Int {} | OvF {} | UndF {} | LINK {}",
fifo.bFF_GPReadEnable ? "ON" : "OFF", fifo.bFF_BPEnable ? "ON" : "OFF",
fifo.bFF_BPInt ? "ON" : "OFF", m_CPCtrlReg.FifoOverflowIntEnable ? "ON" : "OFF",
fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) ? "ON" : "OFF",
fifo.bFF_BPEnable.load(std::memory_order_relaxed) ? "ON" : "OFF",
fifo.bFF_BPInt.load(std::memory_order_relaxed) ? "ON" : "OFF",
m_CPCtrlReg.FifoOverflowIntEnable ? "ON" : "OFF",
m_CPCtrlReg.FifoUnderflowIntEnable ? "ON" : "OFF",
m_CPCtrlReg.GPLinkEnable ? "ON" : "OFF");
}
@@ -588,32 +602,35 @@ void HandleUnknownOpcode(u8 cmd_byte, void* buffer, bool preprocess)
cmd_byte, buffer, preprocess ? "preprocess=true" : "preprocess=false");

{
PanicAlertFmt(
"Illegal command {:02x}\n"
"CPBase: {:#010x}\n"
"CPEnd: {:#010x}\n"
"CPHiWatermark: {:#010x}\n"
"CPLoWatermark: {:#010x}\n"
"CPReadWriteDistance: {:#010x}\n"
"CPWritePointer: {:#010x}\n"
"CPReadPointer: {:#010x}\n"
"CPBreakpoint: {:#010x}\n"
"bFF_GPReadEnable: {}\n"
"bFF_BPEnable: {}\n"
"bFF_BPInt: {}\n"
"bFF_Breakpoint: {}\n"
"bFF_GPLinkEnable: {}\n"
"bFF_HiWatermarkInt: {}\n"
"bFF_LoWatermarkInt: {}\n",
cmd_byte, fifo.CPBase.load(std::memory_order_relaxed),
fifo.CPEnd.load(std::memory_order_relaxed), fifo.CPHiWatermark, fifo.CPLoWatermark,
fifo.CPReadWriteDistance.load(std::memory_order_relaxed),
fifo.CPWritePointer.load(std::memory_order_relaxed),
fifo.CPReadPointer.load(std::memory_order_relaxed),
fifo.CPBreakpoint.load(std::memory_order_relaxed), fifo.bFF_GPReadEnable ? "true" : "false",
fifo.bFF_BPEnable ? "true" : "false", fifo.bFF_BPInt ? "true" : "false",
fifo.bFF_Breakpoint ? "true" : "false", fifo.bFF_GPLinkEnable ? "true" : "false",
fifo.bFF_HiWatermarkInt ? "true" : "false", fifo.bFF_LoWatermarkInt ? "true" : "false");
PanicAlertFmt("Illegal command {:02x}\n"
"CPBase: {:#010x}\n"
"CPEnd: {:#010x}\n"
"CPHiWatermark: {:#010x}\n"
"CPLoWatermark: {:#010x}\n"
"CPReadWriteDistance: {:#010x}\n"
"CPWritePointer: {:#010x}\n"
"CPReadPointer: {:#010x}\n"
"CPBreakpoint: {:#010x}\n"
"bFF_GPReadEnable: {}\n"
"bFF_BPEnable: {}\n"
"bFF_BPInt: {}\n"
"bFF_Breakpoint: {}\n"
"bFF_GPLinkEnable: {}\n"
"bFF_HiWatermarkInt: {}\n"
"bFF_LoWatermarkInt: {}\n",
cmd_byte, fifo.CPBase.load(std::memory_order_relaxed),
fifo.CPEnd.load(std::memory_order_relaxed), fifo.CPHiWatermark,
fifo.CPLoWatermark, fifo.CPReadWriteDistance.load(std::memory_order_relaxed),
fifo.CPWritePointer.load(std::memory_order_relaxed),
fifo.CPReadPointer.load(std::memory_order_relaxed),
fifo.CPBreakpoint.load(std::memory_order_relaxed),
fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) ? "true" : "false",
fifo.bFF_BPEnable.load(std::memory_order_relaxed) ? "true" : "false",
fifo.bFF_BPInt.load(std::memory_order_relaxed) ? "true" : "false",
fifo.bFF_Breakpoint.load(std::memory_order_relaxed) ? "true" : "false",
fifo.bFF_GPLinkEnable.load(std::memory_order_relaxed) ? "true" : "false",
fifo.bFF_HiWatermarkInt.load(std::memory_order_relaxed) ? "true" : "false",
fifo.bFF_LoWatermarkInt.load(std::memory_order_relaxed) ? "true" : "false");
}
}

@@ -29,17 +29,17 @@ struct SCPFifoStruct
std::atomic<u32> CPBreakpoint;
std::atomic<u32> SafeCPReadPointer;

volatile u32 bFF_GPLinkEnable;
volatile u32 bFF_GPReadEnable;
volatile u32 bFF_BPEnable;
volatile u32 bFF_BPInt;
volatile u32 bFF_Breakpoint;
std::atomic<u32> bFF_GPLinkEnable;
std::atomic<u32> bFF_GPReadEnable;
std::atomic<u32> bFF_BPEnable;
std::atomic<u32> bFF_BPInt;
std::atomic<u32> bFF_Breakpoint;

volatile u32 bFF_LoWatermarkInt;
volatile u32 bFF_HiWatermarkInt;
std::atomic<u32> bFF_LoWatermarkInt;
std::atomic<u32> bFF_HiWatermarkInt;

volatile u32 bFF_LoWatermark;
volatile u32 bFF_HiWatermark;
std::atomic<u32> bFF_LoWatermark;
std::atomic<u32> bFF_HiWatermark;

void DoState(PointerWrap& p);
};
@@ -139,7 +139,7 @@ void Shutdown()
void ExitGpuLoop()
{
// This should break the wait loop in CPU thread
CommandProcessor::fifo.bFF_GPReadEnable = false;
CommandProcessor::fifo.bFF_GPReadEnable.store(0, std::memory_order_relaxed);
FlushGpu();

// Terminate GPU thread loop
@@ -327,7 +327,8 @@ void RunGpuLoop()
CommandProcessor::SetCPStatusFromGPU();

// check if we are able to run this buffer
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable &&
while (!CommandProcessor::IsInterruptWaiting() &&
fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) &&
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint())
{
if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
@@ -415,8 +416,9 @@ void GpuMaySleep()
bool AtBreakpoint()
{
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
return fifo.bFF_BPEnable && (fifo.CPReadPointer.load(std::memory_order_relaxed) ==
fifo.CPBreakpoint.load(std::memory_order_relaxed));
return fifo.bFF_BPEnable.load(std::memory_order_relaxed) &&
(fifo.CPReadPointer.load(std::memory_order_relaxed) ==
fifo.CPBreakpoint.load(std::memory_order_relaxed));
}

void RunGpu()
@@ -446,8 +448,9 @@ static int RunGpuOnCpu(int ticks)
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
bool reset_simd_state = false;
int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance.load(std::memory_order_relaxed) &&
!AtBreakpoint() && available_ticks >= 0)
while (fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) &&
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint() &&
available_ticks >= 0)
{
if (s_use_deterministic_gpu_thread)
{
@@ -4,7 +4,10 @@

#pragma once

#include <array>
#include <atomic>
#include <memory>

#include "Common/CommonTypes.h"

enum PerfQueryType
@@ -61,9 +64,8 @@ class PerfQueryBase
virtual bool IsFlushed() const { return true; }

protected:
// TODO: sloppy
volatile u32 m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
std::atomic<u32> m_query_count;
std::array<std::atomic<u32>, PQG_NUM_MEMBERS> m_results;
};

extern std::unique_ptr<PerfQueryBase> g_perf_query;