Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #9710 from JosJuice/volatile-begone
Remove all remaining volatile qualifiers
  • Loading branch information
lioncash committed May 20, 2021
2 parents fb43aaf + 8a0f5ea commit 8b81481
Show file tree
Hide file tree
Showing 11 changed files with 231 additions and 180 deletions.
11 changes: 3 additions & 8 deletions Source/Core/Common/ChunkFile.h
Expand Up @@ -32,11 +32,6 @@
#include "Common/Inline.h"
#include "Common/Logging/Log.h"

// XXX: Replace this with std::is_trivially_copyable<T> once we stop using volatile
// on things that are put in savestates, as volatile types are not trivially copyable.
template <typename T>
constexpr bool IsTriviallyCopyable = std::is_trivially_copyable<std::remove_volatile_t<T>>::value;

// Wrapper class
class PointerWrap
{
Expand Down Expand Up @@ -181,13 +176,13 @@ class PointerWrap
DoArray(x.data(), static_cast<u32>(x.size()));
}

template <typename T, typename std::enable_if_t<IsTriviallyCopyable<T>, int> = 0>
template <typename T, typename std::enable_if_t<std::is_trivially_copyable_v<T>, int> = 0>
void DoArray(T* x, u32 count)
{
DoVoid(x, count * sizeof(T));
}

template <typename T, typename std::enable_if_t<!IsTriviallyCopyable<T>, int> = 0>
template <typename T, typename std::enable_if_t<!std::is_trivially_copyable_v<T>, int> = 0>
void DoArray(T* x, u32 count)
{
for (u32 i = 0; i < count; ++i)
Expand Down Expand Up @@ -230,7 +225,7 @@ class PointerWrap
template <typename T>
void Do(T& x)
{
static_assert(IsTriviallyCopyable<T>, "Only sane for trivially copyable types");
static_assert(std::is_trivially_copyable_v<T>, "Only sane for trivially copyable types");
// Note:
// Usually we can just use x = **ptr, etc. However, this doesn't work
// for unions containing BitFields (long story, stupid language rules)
Expand Down
10 changes: 0 additions & 10 deletions Source/Core/Core/HW/MMIO.cpp
Expand Up @@ -101,20 +101,10 @@ ReadHandlingMethod<T>* DirectRead(const T* addr, u32 mask)
return new DirectHandlingMethod<T>(const_cast<T*>(addr), mask);
}
template <typename T>
ReadHandlingMethod<T>* DirectRead(volatile const T* addr, u32 mask)
{
return new DirectHandlingMethod<T>((T*)addr, mask);
}
template <typename T>
WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask)
{
return new DirectHandlingMethod<T>(addr, mask);
}
template <typename T>
WriteHandlingMethod<T>* DirectWrite(volatile T* addr, u32 mask)
{
return new DirectHandlingMethod<T>((T*)addr, mask);
}

// Complex: holds a lambda that is called when a read or a write is executed.
// This gives complete control to the user as to what is going to happen during
Expand Down
6 changes: 0 additions & 6 deletions Source/Core/Core/HW/MMIOHandlers.h
Expand Up @@ -46,11 +46,7 @@ WriteHandlingMethod<T>* Nop();
template <typename T>
ReadHandlingMethod<T>* DirectRead(const T* addr, u32 mask = 0xFFFFFFFF);
template <typename T>
ReadHandlingMethod<T>* DirectRead(volatile const T* addr, u32 mask = 0xFFFFFFFF);
template <typename T>
WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask = 0xFFFFFFFF);
template <typename T>
WriteHandlingMethod<T>* DirectWrite(volatile T* addr, u32 mask = 0xFFFFFFFF);

// Complex: use when no other handling method fits your needs. These allow you
// to directly provide a function that will be called when a read/write needs
Expand Down Expand Up @@ -204,9 +200,7 @@ class WriteHandler
MaybeExtern template ReadHandlingMethod<T>* Constant<T>(T value); \
MaybeExtern template WriteHandlingMethod<T>* Nop<T>(); \
MaybeExtern template ReadHandlingMethod<T>* DirectRead(const T* addr, u32 mask); \
MaybeExtern template ReadHandlingMethod<T>* DirectRead(volatile const T* addr, u32 mask); \
MaybeExtern template WriteHandlingMethod<T>* DirectWrite(T* addr, u32 mask); \
MaybeExtern template WriteHandlingMethod<T>* DirectWrite(volatile T* addr, u32 mask); \
MaybeExtern template ReadHandlingMethod<T>* ComplexRead<T>(std::function<T(u32)>); \
MaybeExtern template WriteHandlingMethod<T>* ComplexWrite<T>(std::function<void(u32, T)>); \
MaybeExtern template ReadHandlingMethod<T>* InvalidRead<T>(); \
Expand Down
53 changes: 35 additions & 18 deletions Source/Core/VideoBackends/D3D/D3DPerfQuery.cpp
Expand Up @@ -27,11 +27,13 @@ PerfQuery::~PerfQuery() = default;

void PerfQuery::EnableQuery(PerfQueryGroup type)
{
const u32 query_count = m_query_count.load(std::memory_order_relaxed);

// Is this sane?
if (m_query_count > m_query_buffer.size() / 2)
if (query_count > m_query_buffer.size() / 2)
WeakFlush();

if (m_query_buffer.size() == m_query_count)
if (m_query_buffer.size() == query_count)
{
// TODO
FlushOne();
Expand All @@ -41,12 +43,12 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];

D3D::context->Begin(entry.query.Get());
entry.query_type = type;

++m_query_count;
m_query_count.fetch_add(1, std::memory_order_relaxed);
}
}

Expand All @@ -55,30 +57,41 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + m_query_buffer.size() - 1) %
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count.load(std::memory_order_relaxed) +
m_query_buffer.size() - 1) %
m_query_buffer.size()];
D3D::context->End(entry.query.Get());
}
}

void PerfQuery::ResetQuery()
{
m_query_count = 0;
std::fill(std::begin(m_results), std::end(m_results), 0);
m_query_count.store(0, std::memory_order_relaxed);
for (size_t i = 0; i < m_results.size(); ++i)
m_results[i].store(0, std::memory_order_relaxed);
}

u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;

if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
result = m_results[PQG_ZCOMP_ZCOMPLOC];
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
result = m_results[PQG_ZCOMP];
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
result = m_results[PQG_EFB_COPY_CLOCKS];
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}

return result;
}
Expand All @@ -98,11 +111,13 @@ void PerfQuery::FlushOne()
// NOTE: Reported pixel metrics should be referenced to native resolution
// TODO: Dropping the lower 2 bits from this count should be closer to actual
// hardware behavior when drawing triangles.
m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() *
EFB_HEIGHT / g_renderer->GetTargetHeight());
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
g_renderer->GetTargetHeight();
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);

m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
m_query_count.fetch_sub(1, std::memory_order_relaxed);
}

// TODO: could selectively flush things, but I don't think that will do much
Expand All @@ -125,11 +140,13 @@ void PerfQuery::WeakFlush()
if (hr == S_OK)
{
// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() *
EFB_HEIGHT / g_renderer->GetTargetHeight());
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
g_renderer->GetTargetHeight();
m_results[entry.query_type].store(static_cast<u32>(native_res_result),
std::memory_order_relaxed);

m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
m_query_count.fetch_sub(1, std::memory_order_relaxed);
}
else
{
Expand All @@ -140,7 +157,7 @@ void PerfQuery::WeakFlush()

bool PerfQuery::IsFlushed() const
{
return 0 == m_query_count;
return m_query_count.load(std::memory_order_relaxed) == 0;
}

} // namespace DX11
45 changes: 29 additions & 16 deletions Source/Core/VideoBackends/D3D12/D3D12PerfQuery.cpp
Expand Up @@ -52,10 +52,11 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Block if there are no free slots.
// Otherwise, try to keep half of them available.
if (m_query_count > m_query_buffer.size() / 2)
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
if (query_count > m_query_buffer.size() / 2)
{
const bool do_resolve = m_unresolved_queries > m_query_buffer.size() / 2;
const bool blocking = m_query_count == PERF_QUERY_BUFFER_SIZE;
const bool blocking = query_count == PERF_QUERY_BUFFER_SIZE;
PartialFlush(do_resolve, blocking);
}

Expand Down Expand Up @@ -83,19 +84,20 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
m_query_next_pos);
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
m_query_count++;
m_query_count.fetch_add(1, std::memory_order_relaxed);
m_unresolved_queries++;
}
}

void PerfQuery::ResetQuery()
{
m_query_count = 0;
m_query_count.store(0, std::memory_order_relaxed);
m_unresolved_queries = 0;
m_query_resolve_pos = 0;
m_query_readback_pos = 0;
m_query_next_pos = 0;
std::fill(std::begin(m_results), std::end(m_results), 0);
for (size_t i = 0; i < m_results.size(); ++i)
m_results[i].store(0, std::memory_order_relaxed);
for (auto& entry : m_query_buffer)
{
entry.fence_value = 0;
Expand All @@ -108,13 +110,22 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
result = m_results[PQG_ZCOMP_ZCOMPLOC];
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
result = m_results[PQG_ZCOMP];
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
result = m_results[PQG_EFB_COPY_CLOCKS];
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}

return result / 4;
}
Expand All @@ -127,7 +138,7 @@ void PerfQuery::FlushResults()

bool PerfQuery::IsFlushed() const
{
return m_query_count == 0;
return m_query_count.load(std::memory_order_relaxed) == 0;
}

void PerfQuery::ResolveQueries()
Expand Down Expand Up @@ -165,7 +176,7 @@ void PerfQuery::ReadbackQueries(bool blocking)
u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue();

// Need to save these since ProcessResults will modify them.
const u32 outstanding_queries = m_query_count;
const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed);
u32 readback_count = 0;
for (u32 i = 0; i < outstanding_queries; i++)
{
Expand Down Expand Up @@ -203,7 +214,7 @@ void PerfQuery::ReadbackQueries(bool blocking)
void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
{
// Should be at maximum query_count queries pending.
ASSERT(query_count <= m_query_count &&
ASSERT(query_count <= m_query_count.load(std::memory_order_relaxed) &&
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);

const D3D12_RANGE read_range = {m_query_readback_pos * sizeof(PerfQueryDataType),
Expand Down Expand Up @@ -231,16 +242,18 @@ void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
std::memcpy(&result, mapped_ptr + (index * sizeof(PerfQueryDataType)), sizeof(result));

// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] +=
static_cast<u32>(static_cast<u64>(result) * EFB_WIDTH / g_renderer->GetTargetWidth() *
EFB_HEIGHT / g_renderer->GetTargetHeight());
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH /
g_renderer->GetTargetWidth() * EFB_HEIGHT /
g_renderer->GetTargetHeight();
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
std::memory_order_relaxed);
}

constexpr D3D12_RANGE write_range = {0, 0};
m_query_readback_buffer->Unmap(0, &write_range);

m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
m_query_count -= query_count;
m_query_count.fetch_sub(query_count, std::memory_order_relaxed);
}

void PerfQuery::PartialFlush(bool resolve, bool blocking)
Expand Down

0 comments on commit 8b81481

Please sign in to comment.