@@ -8,24 +8,81 @@

namespace DX12
{
BoundingBox::BoundingBox() = default;

BoundingBox::~BoundingBox()
D3D12BoundingBox::~D3D12BoundingBox()
{
if (m_gpu_descriptor)
g_dx_context->GetDescriptorHeapManager().Free(m_gpu_descriptor);
}

std::unique_ptr<BoundingBox> BoundingBox::Create()
bool D3D12BoundingBox::Initialize()
{
if (!CreateBuffers())
return false;

Renderer::GetInstance()->SetPixelShaderUAV(m_gpu_descriptor.cpu_handle);
return true;
}

std::vector<BBoxType> D3D12BoundingBox::Read(u32 index, u32 length)
{
// Copy from GPU->CPU buffer, and wait for the GPU to finish the copy.
ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
g_dx_context->GetCommandList()->CopyBufferRegion(m_readback_buffer.Get(), 0, m_gpu_buffer.Get(),
0, BUFFER_SIZE);
ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(),
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
Renderer::GetInstance()->ExecuteCommandList(true);

// Read back to cached values.
std::vector<BBoxType> values(length);
static constexpr D3D12_RANGE read_range = {0, BUFFER_SIZE};
void* mapped_pointer;
HRESULT hr = m_readback_buffer->Map(0, &read_range, &mapped_pointer);
CHECK(SUCCEEDED(hr), "Map bounding box CPU buffer");
if (FAILED(hr))
return values;

// Copy out the values we want
std::memcpy(values.data(), reinterpret_cast<const u8*>(mapped_pointer) + sizeof(BBoxType) * index,
sizeof(BBoxType) * length);

static constexpr D3D12_RANGE write_range = {0, 0};
m_readback_buffer->Unmap(0, &write_range);

return values;
}

void D3D12BoundingBox::Write(u32 index, const std::vector<BBoxType>& values)
{
auto bbox = std::unique_ptr<BoundingBox>(new BoundingBox());
if (!bbox->CreateBuffers())
return nullptr;
const u32 copy_size = static_cast<u32>(values.size()) * sizeof(BBoxType);
if (!m_upload_buffer.ReserveMemory(copy_size, sizeof(BBoxType)))
{
WARN_LOG_FMT(VIDEO, "Executing command list while waiting for space in bbox stream buffer");
Renderer::GetInstance()->ExecuteCommandList(false);
if (!m_upload_buffer.ReserveMemory(copy_size, sizeof(BBoxType)))
{
PanicAlertFmt("Failed to allocate bbox stream buffer space");
return;
}
}

const u32 upload_buffer_offset = m_upload_buffer.GetCurrentOffset();
std::memcpy(m_upload_buffer.GetCurrentHostPointer(), values.data(), copy_size);
m_upload_buffer.CommitMemory(copy_size);

ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST);

g_dx_context->GetCommandList()->CopyBufferRegion(m_gpu_buffer.Get(), index * sizeof(BBoxType),
m_upload_buffer.GetBuffer(),
upload_buffer_offset, copy_size);

return bbox;
ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(),
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}

bool BoundingBox::CreateBuffers()
bool D3D12BoundingBox::CreateBuffers()
{
static constexpr D3D12_HEAP_PROPERTIES gpu_heap_properties = {D3D12_HEAP_TYPE_DEFAULT};
static constexpr D3D12_HEAP_PROPERTIES cpu_heap_properties = {D3D12_HEAP_TYPE_READBACK};
@@ -48,7 +105,7 @@ bool BoundingBox::CreateBuffers()
return false;

D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {DXGI_FORMAT_R32_SINT, D3D12_UAV_DIMENSION_BUFFER};
uav_desc.Buffer.NumElements = NUM_VALUES;
uav_desc.Buffer.NumElements = NUM_BBOX_VALUES;
g_dx_context->GetDevice()->CreateUnorderedAccessView(m_gpu_buffer.Get(), nullptr, &uav_desc,
m_gpu_descriptor.cpu_handle);

@@ -63,120 +120,6 @@ bool BoundingBox::CreateBuffers()
if (!m_upload_buffer.AllocateBuffer(STREAM_BUFFER_SIZE))
return false;

// Both the CPU and GPU buffer's contents is unknown, so force a flush the first time.
m_values.fill(0);
m_dirty.fill(true);
m_valid = true;
return true;
}

void BoundingBox::Readback()
{
// Copy from GPU->CPU buffer, and wait for the GPU to finish the copy.
ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
g_dx_context->GetCommandList()->CopyBufferRegion(m_readback_buffer.Get(), 0, m_gpu_buffer.Get(),
0, BUFFER_SIZE);
ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(),
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
Renderer::GetInstance()->ExecuteCommandList(true);

// Read back to cached values.
static constexpr D3D12_RANGE read_range = {0, BUFFER_SIZE};
void* mapped_pointer;
HRESULT hr = m_readback_buffer->Map(0, &read_range, &mapped_pointer);
CHECK(SUCCEEDED(hr), "Map bounding box CPU buffer");
if (FAILED(hr))
return;

static constexpr D3D12_RANGE write_range = {0, 0};
std::array<s32, NUM_VALUES> new_values;
std::memcpy(new_values.data(), mapped_pointer, BUFFER_SIZE);
m_readback_buffer->Unmap(0, &write_range);

// Preserve dirty values, that way we don't need to sync.
for (u32 i = 0; i < NUM_VALUES; i++)
{
if (!m_dirty[i])
m_values[i] = new_values[i];
}
m_valid = true;
}

s32 BoundingBox::Get(size_t index)
{
if (!m_valid)
Readback();

return m_values[index];
}

void BoundingBox::Set(size_t index, s32 value)
{
m_values[index] = value;
m_dirty[index] = true;
}

void BoundingBox::Invalidate()
{
m_dirty.fill(false);
m_valid = false;
}

void BoundingBox::Flush()
{
bool in_copy_state = false;
for (u32 start = 0; start < NUM_VALUES;)
{
if (!m_dirty[start])
{
start++;
continue;
}

u32 end = start + 1;
m_dirty[start] = false;
for (; end < NUM_VALUES; end++)
{
if (!m_dirty[end])
break;

m_dirty[end] = false;
}

const u32 copy_size = (end - start) * sizeof(ValueType);
if (!m_upload_buffer.ReserveMemory(copy_size, sizeof(ValueType)))
{
WARN_LOG_FMT(VIDEO, "Executing command list while waiting for space in bbox stream buffer");
Renderer::GetInstance()->ExecuteCommandList(false);
if (!m_upload_buffer.ReserveMemory(copy_size, sizeof(ValueType)))
{
PanicAlertFmt("Failed to allocate bbox stream buffer space");
return;
}
}

const u32 upload_buffer_offset = m_upload_buffer.GetCurrentOffset();
std::memcpy(m_upload_buffer.GetCurrentHostPointer(), &m_values[start], copy_size);
m_upload_buffer.CommitMemory(copy_size);

if (!in_copy_state)
{
ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST);
in_copy_state = true;
}

g_dx_context->GetCommandList()->CopyBufferRegion(m_gpu_buffer.Get(), start * sizeof(ValueType),
m_upload_buffer.GetBuffer(),
upload_buffer_offset, copy_size);
start = end;
}

if (in_copy_state)
{
ResourceBarrier(g_dx_context->GetCommandList(), m_gpu_buffer.Get(),
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
}
}; // namespace DX12
@@ -2,47 +2,39 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include <memory>
#include "VideoBackends/D3D12/Common.h"
#include "VideoBackends/D3D12/D3D12StreamBuffer.h"
#include "VideoBackends/D3D12/DescriptorHeapManager.h"

#include "VideoCommon/BoundingBox.h"

namespace DX12
{
class BoundingBox
class D3D12BoundingBox final : public BoundingBox
{
public:
~BoundingBox();

static std::unique_ptr<BoundingBox> Create();
~D3D12BoundingBox() override;

const DescriptorHandle& GetGPUDescriptor() const { return m_gpu_descriptor; }
bool Initialize() override;

s32 Get(size_t index);
void Set(size_t index, s32 value);

void Invalidate();
void Flush();
protected:
std::vector<BBoxType> Read(u32 index, u32 length) override;
void Write(u32 index, const std::vector<BBoxType>& values) override;

private:
using ValueType = s32;
static const u32 NUM_VALUES = 4;
static const u32 BUFFER_SIZE = sizeof(ValueType) * NUM_VALUES;
static const u32 MAX_UPDATES_PER_FRAME = 128;
static const u32 STREAM_BUFFER_SIZE = BUFFER_SIZE * MAX_UPDATES_PER_FRAME;

BoundingBox();
static constexpr u32 BUFFER_SIZE = sizeof(BBoxType) * NUM_BBOX_VALUES;
static constexpr u32 MAX_UPDATES_PER_FRAME = 128;
static constexpr u32 STREAM_BUFFER_SIZE = BUFFER_SIZE * MAX_UPDATES_PER_FRAME;

bool CreateBuffers();
void Readback();

// Three buffers: GPU for read/write, CPU for reading back, and CPU for staging changes.
ComPtr<ID3D12Resource> m_gpu_buffer;
ComPtr<ID3D12Resource> m_readback_buffer;
StreamBuffer m_upload_buffer;
DescriptorHandle m_gpu_descriptor;
std::array<ValueType, NUM_VALUES> m_values = {};
std::array<bool, NUM_VALUES> m_dirty = {};
bool m_valid = true;
};
}; // namespace DX12

} // namespace DX12
@@ -46,17 +46,11 @@ bool Renderer::Initialize()
if (!::Renderer::Initialize())
return false;

m_bounding_box = BoundingBox::Create();
if (!m_bounding_box)
return false;

SetPixelShaderUAV(m_bounding_box->GetGPUDescriptor().cpu_handle);
return true;
}

void Renderer::Shutdown()
{
m_bounding_box.reset();
m_swap_chain.reset();

::Renderer::Shutdown();
@@ -107,20 +101,9 @@ std::unique_ptr<AbstractPipeline> Renderer::CreatePipeline(const AbstractPipelin
return DXPipeline::Create(config, cache_data, cache_data_length);
}

u16 Renderer::BBoxReadImpl(int index)
{
return static_cast<u16>(m_bounding_box->Get(index));
}

void Renderer::BBoxWriteImpl(int index, u16 value)
{
m_bounding_box->Set(index, value);
}

void Renderer::BBoxFlushImpl()
std::unique_ptr<BoundingBox> Renderer::CreateBoundingBox() const
{
m_bounding_box->Flush();
m_bounding_box->Invalidate();
return std::make_unique<D3D12BoundingBox>();
}

void Renderer::Flush()
@@ -8,9 +8,10 @@
#include "VideoBackends/D3D12/DescriptorHeapManager.h"
#include "VideoCommon/RenderBase.h"

class BoundingBox;

namespace DX12
{
class BoundingBox;
class DXFramebuffer;
class DXTexture;
class DXShader;
@@ -48,10 +49,6 @@ class Renderer final : public ::Renderer
const void* cache_data = nullptr,
size_t cache_data_length = 0) override;

u16 BBoxReadImpl(int index) override;
void BBoxWriteImpl(int index, u16 value) override;
void BBoxFlushImpl() override;

void Flush() override;
void WaitForGPUIdle() override;

@@ -100,6 +97,8 @@ class Renderer final : public ::Renderer
protected:
void OnConfigChanged(u32 bits) override;

std::unique_ptr<BoundingBox> CreateBoundingBox() const override;

private:
static const u32 MAX_TEXTURES = 8;
static const u32 NUM_CONSTANT_BUFFERS = 3;
@@ -150,7 +149,6 @@ class Renderer final : public ::Renderer

// Owned objects
std::unique_ptr<SwapChain> m_swap_chain;
std::unique_ptr<BoundingBox> m_bounding_box;

// Current state
struct
@@ -1,5 +1,6 @@
add_library(videonull
NullBackend.cpp
NullBoundingBox.h
NullRender.cpp
NullRender.h
NullTexture.cpp
@@ -0,0 +1,25 @@
// Copyright 2021 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include "Common/CommonTypes.h"

#include "VideoCommon/BoundingBox.h"

namespace Null
{
class NullBoundingBox final : public BoundingBox
{
public:
bool Initialize() override { return true; }

protected:
std::vector<BBoxType> Read(u32 index, u32 length) override
{
return std::vector<BBoxType>(length);
}
void Write(u32 index, const std::vector<BBoxType>& values) override {}
};

} // namespace Null
@@ -3,6 +3,7 @@

#include "VideoBackends/Null/NullRender.h"

#include "VideoBackends/Null/NullBoundingBox.h"
#include "VideoBackends/Null/NullTexture.h"

#include "VideoCommon/AbstractPipeline.h"
@@ -83,4 +84,9 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
return std::make_unique<NativeVertexFormat>(vtx_decl);
}

std::unique_ptr<BoundingBox> Renderer::CreateBoundingBox() const
{
return std::make_unique<NullBoundingBox>();
}
} // namespace Null
@@ -5,6 +5,8 @@

#include "VideoCommon/RenderBase.h"

class BoundingBox;

namespace Null
{
class Renderer final : public ::Renderer
@@ -35,14 +37,15 @@ class Renderer final : public ::Renderer

u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override { return 0; }
void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override {}
u16 BBoxReadImpl(int index) override { return 0; }
void BBoxWriteImpl(int index, u16 value) override {}

void ClearScreen(const MathUtil::Rectangle<int>& rc, bool colorEnable, bool alphaEnable,
bool zEnable, u32 color, u32 z) override
{
}

void ReinterpretPixelData(EFBReinterpretType convtype) override {}

protected:
std::unique_ptr<BoundingBox> CreateBoundingBox() const override;
};
} // namespace Null
@@ -1,91 +1,35 @@
// Copyright 2014 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include <algorithm>
#include <array>
#include <cstring>

#include "Common/GL/GLUtil.h"

#include "VideoBackends/OGL/OGLBoundingBox.h"
#include "VideoBackends/OGL/OGLRender.h"

#include "VideoBackends/OGL/OGLRender.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/VideoConfig.h"

enum : u32
{
NUM_BBOX_VALUES = 4,
};

static GLuint s_bbox_buffer_id;
static std::array<s32, NUM_BBOX_VALUES> s_bbox_values;
static std::array<bool, NUM_BBOX_VALUES> s_bbox_dirty;
static bool s_bbox_valid = false;

namespace OGL
{
void BoundingBox::Init()
{
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return;

const s32 initial_values[NUM_BBOX_VALUES] = {0, 0, 0, 0};
std::memcpy(s_bbox_values.data(), initial_values, sizeof(s_bbox_values));
s_bbox_dirty = {};
s_bbox_valid = true;

glGenBuffers(1, &s_bbox_buffer_id);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(initial_values), initial_values, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id);
}

void BoundingBox::Shutdown()
OGLBoundingBox::~OGLBoundingBox()
{
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return;

glDeleteBuffers(1, &s_bbox_buffer_id);
if (m_buffer_id)
glDeleteBuffers(1, &m_buffer_id);
}

void BoundingBox::Flush()
bool OGLBoundingBox::Initialize()
{
s_bbox_valid = false;
const BBoxType initial_values[NUM_BBOX_VALUES] = {0, 0, 0, 0};

if (std::none_of(s_bbox_dirty.begin(), s_bbox_dirty.end(), [](bool dirty) { return dirty; }))
return;

glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);

for (u32 start = 0; start < NUM_BBOX_VALUES;)
{
if (!s_bbox_dirty[start])
{
start++;
continue;
}

u32 end = start + 1;
s_bbox_dirty[start] = false;
for (; end < NUM_BBOX_VALUES; end++)
{
if (!s_bbox_dirty[end])
break;

s_bbox_dirty[end] = false;
}

glBufferSubData(GL_SHADER_STORAGE_BUFFER, start * sizeof(s32), (end - start) * sizeof(s32),
&s_bbox_values[start]);
}
glGenBuffers(1, &m_buffer_id);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer_id);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(initial_values), initial_values, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer_id);

glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
return true;
}

void BoundingBox::Readback()
std::vector<BBoxType> OGLBoundingBox::Read(u32 index, u32 length)
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
std::vector<BBoxType> values(length);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer_id);

// Using glMapBufferRange to read back the contents of the SSBO is extremely slow
// on nVidia drivers. This is more noticeable at higher internal resolutions.
@@ -101,52 +45,33 @@ void BoundingBox::Readback()
// explain why it needs the cache invalidate.
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

std::array<s32, NUM_BBOX_VALUES> gpu_values;
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES,
gpu_values.data());
for (u32 i = 0; i < NUM_BBOX_VALUES; i++)
{
if (!s_bbox_dirty[i])
s_bbox_values[i] = gpu_values[i];
}
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, sizeof(BBoxType) * index,
sizeof(BBoxType) * length, values.data());
}
else
{
// Using glMapBufferRange is faster on AMD cards by a measurable margin.
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES,
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(BBoxType) * NUM_BBOX_VALUES,
GL_MAP_READ_BIT);
if (ptr)
{
for (u32 i = 0; i < NUM_BBOX_VALUES; i++)
{
if (!s_bbox_dirty[i])
{
std::memcpy(&s_bbox_values[i], reinterpret_cast<const u8*>(ptr) + sizeof(s32) * i,
sizeof(s32));
}
}
std::memcpy(values.data(), reinterpret_cast<const u8*>(ptr) + sizeof(BBoxType) * index,
sizeof(BBoxType) * length);

glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
}

glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
s_bbox_valid = true;
return values;
}

void BoundingBox::Set(int index, int value)
void OGLBoundingBox::Write(u32 index, const std::vector<BBoxType>& values)
{
if (s_bbox_valid && s_bbox_values[index] == value)
return;

s_bbox_values[index] = value;
s_bbox_dirty[index] = true;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer_id);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, sizeof(BBoxType) * index,
sizeof(BBoxType) * values.size(), values.data());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}

int BoundingBox::Get(int index)
{
if (!s_bbox_valid)
Readback();

return s_bbox_values[index];
}
}; // namespace OGL
} // namespace OGL
@@ -3,18 +3,26 @@

#pragma once

#include "Common/CommonTypes.h"
#include "Common/GL/GLUtil.h"

#include "VideoCommon/BoundingBox.h"

namespace OGL
{
class BoundingBox
class OGLBoundingBox final : public BoundingBox
{
public:
static void Init();
static void Shutdown();
~OGLBoundingBox() override;

static void Flush();
static void Readback();
bool Initialize() override;

static void Set(int index, int value);
static int Get(int index);
protected:
std::vector<BBoxType> Read(u32 index, u32 length) override;
void Write(u32 index, const std::vector<BBoxType>& values) override;

private:
GLuint m_buffer_id = 0;
};
}; // namespace OGL

} // namespace OGL
@@ -44,7 +44,6 @@ Make AA apply instantly during gameplay if possible

#include "Core/Config/GraphicsSettings.h"

#include "VideoBackends/OGL/OGLBoundingBox.h"
#include "VideoBackends/OGL/OGLPerfQuery.h"
#include "VideoBackends/OGL/OGLRender.h"
#include "VideoBackends/OGL/OGLVertexManager.h"
@@ -186,7 +185,6 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi)
g_perf_query = GetPerfQuery();
g_texture_cache = std::make_unique<TextureCacheBase>();
g_sampler_cache = std::make_unique<SamplerCache>();
BoundingBox::Init();

if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() ||
!g_renderer->Initialize() || !g_framebuffer_manager->Initialize() ||
@@ -205,7 +203,6 @@ void VideoBackend::Shutdown()
{
g_shader_cache->Shutdown();
g_renderer->Shutdown();
BoundingBox::Shutdown();
g_sampler_cache.reset();
g_texture_cache.reset();
g_perf_query.reset();
@@ -851,19 +851,9 @@ void Renderer::SetScissorRect(const MathUtil::Rectangle<int>& rc)
glScissor(rc.left, rc.top, rc.GetWidth(), rc.GetHeight());
}

u16 Renderer::BBoxReadImpl(int index)
std::unique_ptr<::BoundingBox> Renderer::CreateBoundingBox() const
{
return static_cast<u16>(BoundingBox::Get(index));
}

void Renderer::BBoxWriteImpl(int index, u16 value)
{
BoundingBox::Set(index, value);
}

void Renderer::BBoxFlushImpl()
{
BoundingBox::Flush();
return std::make_unique<OGLBoundingBox>();
}

void Renderer::SetViewport(float x, float y, float width, float height, float near_depth,
@@ -11,6 +11,8 @@
#include "Common/GL/GLExtensions/GLExtensions.h"
#include "VideoCommon/RenderBase.h"

class BoundingBox;

namespace OGL
{
class OGLFramebuffer;
@@ -128,10 +130,6 @@ class Renderer : public ::Renderer
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;

u16 BBoxReadImpl(int index) override;
void BBoxWriteImpl(int index, u16 value) override;
void BBoxFlushImpl() override;

void BeginUtilityDrawing() override;
void EndUtilityDrawing() override;

@@ -164,6 +162,9 @@ class Renderer : public ::Renderer
// Restores FBO binding after it's been changed.
void RestoreFramebufferBinding();

protected:
std::unique_ptr<BoundingBox> CreateBoundingBox() const override;

private:
void CheckForSurfaceChange();
void CheckForSurfaceResize();
@@ -14,6 +14,8 @@ add_library(videosoftware
SetupUnit.cpp
SetupUnit.h
SWmain.cpp
SWBoundingBox.cpp
SWBoundingBox.h
SWOGLWindow.cpp
SWOGLWindow.h
SWRenderer.cpp
@@ -0,0 +1,66 @@
// Copyright 2021 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/Software/SWBoundingBox.h"

#include <algorithm>
#include <array>

#include "Common/CommonTypes.h"

namespace BBoxManager
{
namespace
{
// Current bounding box coordinates.
std::array<u16, 4> s_coordinates{};
} // Anonymous namespace

u16 GetCoordinate(Coordinate coordinate)
{
return s_coordinates[static_cast<u32>(coordinate)];
}

void SetCoordinate(Coordinate coordinate, u16 value)
{
s_coordinates[static_cast<u32>(coordinate)] = value;
}

void Update(u16 left, u16 right, u16 top, u16 bottom)
{
const u16 new_left = std::min(left, GetCoordinate(Coordinate::Left));
const u16 new_right = std::max(right, GetCoordinate(Coordinate::Right));
const u16 new_top = std::min(top, GetCoordinate(Coordinate::Top));
const u16 new_bottom = std::max(bottom, GetCoordinate(Coordinate::Bottom));

SetCoordinate(Coordinate::Left, new_left);
SetCoordinate(Coordinate::Right, new_right);
SetCoordinate(Coordinate::Top, new_top);
SetCoordinate(Coordinate::Bottom, new_bottom);
}

} // namespace BBoxManager

namespace SW
{
std::vector<BBoxType> SWBoundingBox::Read(u32 index, u32 length)
{
std::vector<BBoxType> values(length);

for (u32 i = 0; i < length; i++)
{
values[i] = BBoxManager::GetCoordinate(static_cast<BBoxManager::Coordinate>(index + i));
}

return values;
}

void SWBoundingBox::Write(u32 index, const std::vector<BBoxType>& values)
{
for (size_t i = 0; i < values.size(); i++)
{
BBoxManager::SetCoordinate(static_cast<BBoxManager::Coordinate>(index + i), values[i]);
}
}

} // namespace SW
@@ -0,0 +1,43 @@
// Copyright 2021 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#include "Common/CommonTypes.h"

#include "VideoCommon/BoundingBox.h"

namespace BBoxManager
{
// Indicates a coordinate of the bounding box.
enum class Coordinate
{
Left, // The X coordinate of the left side of the bounding box.
Right, // The X coordinate of the right side of the bounding box.
Top, // The Y coordinate of the top of the bounding box.
Bottom, // The Y coordinate of the bottom of the bounding box.
};

// Gets a particular coordinate for the bounding box.
u16 GetCoordinate(Coordinate coordinate);

// Sets a particular coordinate for the bounding box.
void SetCoordinate(Coordinate coordinate, u16 value);

// Updates all bounding box coordinates.
void Update(u16 left, u16 right, u16 top, u16 bottom);
} // namespace BBoxManager

namespace SW
{
class SWBoundingBox final : public BoundingBox
{
public:
bool Initialize() override { return true; }

protected:
std::vector<BBoxType> Read(u32 index, u32 length) override;
void Write(u32 index, const std::vector<BBoxType>& values) override;
};

} // namespace SW
@@ -12,13 +12,13 @@

#include "VideoBackends/Software/EfbCopy.h"
#include "VideoBackends/Software/EfbInterface.h"
#include "VideoBackends/Software/SWBoundingBox.h"
#include "VideoBackends/Software/SWOGLWindow.h"
#include "VideoBackends/Software/SWTexture.h"

#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"
#include "VideoCommon/AbstractTexture.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoCommon.h"
@@ -141,14 +141,9 @@ u32 SWRenderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputData)
return value;
}

u16 SWRenderer::BBoxReadImpl(int index)
std::unique_ptr<BoundingBox> SWRenderer::CreateBoundingBox() const
{
return BoundingBox::GetCoordinate(static_cast<BoundingBox::Coordinate>(index));
}

void SWRenderer::BBoxWriteImpl(int index, u16 value)
{
BoundingBox::SetCoordinate(static_cast<BoundingBox::Coordinate>(index), value);
return std::make_unique<SWBoundingBox>();
}

void SWRenderer::ClearScreen(const MathUtil::Rectangle<int>& rc, bool colorEnable, bool alphaEnable,
@@ -10,6 +10,7 @@

#include "VideoCommon/RenderBase.h"

class BoundingBox;
class SWOGLWindow;

namespace SW
@@ -43,8 +44,6 @@ class SWRenderer final : public Renderer

u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override;
void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override {}
u16 BBoxReadImpl(int index) override;
void BBoxWriteImpl(int index, u16 value) override;

void RenderXFBToScreen(const MathUtil::Rectangle<int>& target_rc,
const AbstractTexture* source_texture,
@@ -59,6 +58,9 @@ class SWRenderer final : public Renderer
const AbstractTexture* src_texture,
const MathUtil::Rectangle<int>& src_rect) override;

protected:
std::unique_ptr<BoundingBox> CreateBoundingBox() const override;

private:
std::unique_ptr<SWOGLWindow> m_window;
};
@@ -53,6 +53,10 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
break;
}

// Flush bounding box here because software overrides the base function
if (g_renderer->IsBBoxEnabled())
g_renderer->BBoxFlush();

m_setup_unit.Init(primitiveType);

// set all states with are stored within video sw
@@ -83,6 +83,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsLogicOp = true;
g_Config.backend_info.bSupportsShaderBinaries = false;
g_Config.backend_info.bSupportsPipelineCacheData = false;
g_Config.backend_info.bSupportsBBox = true;

// aamodes
g_Config.backend_info.AAModes = {1};
@@ -11,9 +11,9 @@
#include "Common/CommonTypes.h"
#include "VideoBackends/Software/DebugUtil.h"
#include "VideoBackends/Software/EfbInterface.h"
#include "VideoBackends/Software/SWBoundingBox.h"
#include "VideoBackends/Software/TextureSampler.h"

#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
@@ -839,7 +839,7 @@ void Tev::Draw()

// The GC/Wii GPU rasterizes in 2x2 pixel groups, so bounding box values will be rounded to the
// extents of these groups, rather than the exact pixel.
BoundingBox::Update(static_cast<u16>(Position[0] & ~1), static_cast<u16>(Position[0] | 1),
BBoxManager::Update(static_cast<u16>(Position[0] & ~1), static_cast<u16>(Position[0] | 1),
static_cast<u16>(Position[1] & ~1), static_cast<u16>(Position[1] | 1));

#if ALLOW_TEV_DUMPS
@@ -3,7 +3,6 @@

#include <vector>

#include "Common/Assert.h"
#include "Common/Logging/Log.h"

#include "VideoBackends/Vulkan/CommandBufferManager.h"
@@ -16,11 +15,7 @@

namespace Vulkan
{
BoundingBox::BoundingBox()
{
}

BoundingBox::~BoundingBox()
VKBoundingBox::~VKBoundingBox()
{
if (m_gpu_buffer != VK_NULL_HANDLE)
{
@@ -29,14 +24,8 @@ BoundingBox::~BoundingBox()
}
}

bool BoundingBox::Initialize()
bool VKBoundingBox::Initialize()
{
if (!g_ActiveConfig.backend_info.bSupportsBBox)
{
WARN_LOG_FMT(VIDEO, "Vulkan: Bounding box is unsupported by your device.");
return true;
}

if (!CreateGPUBuffer())
return false;

@@ -48,103 +37,71 @@ bool BoundingBox::Initialize()
return true;
}

void BoundingBox::Flush()
std::vector<BBoxType> VKBoundingBox::Read(u32 index, u32 length)
{
if (m_gpu_buffer == VK_NULL_HANDLE)
return;

// Combine updates together, chances are the game would have written all 4.
bool updated_buffer = false;
for (size_t start = 0; start < 4; start++)
{
if (!m_values_dirty[start])
continue;

size_t count = 0;
std::array<s32, 4> write_values;
for (; (start + count) < 4; count++)
{
if (!m_values_dirty[start + count])
break;

m_readback_buffer->Read((start + count) * sizeof(s32), &write_values[count], sizeof(s32),
false);
m_values_dirty[start + count] = false;
}

// We can't issue vkCmdUpdateBuffer within a render pass.
// However, the writes must be serialized, so we can't put it in the init buffer.
if (!updated_buffer)
{
StateTracker::GetInstance()->EndRenderPass();

// Ensure GPU buffer is in a state where it can be transferred to.
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0,
BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);

updated_buffer = true;
}

vkCmdUpdateBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
start * sizeof(s32), count * sizeof(s32),
reinterpret_cast<const u32*>(write_values.data()));
}

// Restore fragment shader access to the buffer.
if (updated_buffer)
{
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
}
// Can't be done within a render pass.
StateTracker::GetInstance()->EndRenderPass();

// We're now up-to-date.
m_valid = true;
}
// Ensure all writes are completed to the GPU buffer prior to the transfer.
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0,
BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);

void BoundingBox::Invalidate()
{
if (m_gpu_buffer == VK_NULL_HANDLE)
return;
// Copy from GPU -> readback buffer.
VkBufferCopy region = {0, 0, BUFFER_SIZE};
vkCmdCopyBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
m_readback_buffer->GetBuffer(), 1, &region);

m_valid = false;
}
// Restore GPU buffer access.
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);

s32 BoundingBox::Get(size_t index)
{
ASSERT(index < NUM_VALUES);
// Wait until these commands complete.
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);

if (!m_valid)
Readback();
// Cache is now valid.
m_readback_buffer->InvalidateCPUCache();

s32 value;
m_readback_buffer->Read(index * sizeof(s32), &value, sizeof(value), false);
return value;
// Read out the values and return
std::vector<BBoxType> values(length);
m_readback_buffer->Read(index * sizeof(BBoxType), values.data(), length * sizeof(BBoxType),
false);
return values;
}

void BoundingBox::Set(size_t index, s32 value)
void VKBoundingBox::Write(u32 index, const std::vector<BBoxType>& values)
{
ASSERT(index < NUM_VALUES);
// We can't issue vkCmdUpdateBuffer within a render pass.
// However, the writes must be serialized, so we can't put it in the init buffer.
StateTracker::GetInstance()->EndRenderPass();

// If we're currently valid, update the stored value in both our cache and the GPU buffer.
if (m_valid)
{
// Skip when it hasn't changed.
s32 current_value;
m_readback_buffer->Read(index * sizeof(s32), &current_value, sizeof(current_value), false);
if (current_value == value)
return;
}
// Ensure GPU buffer is in a state where it can be transferred to.
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0,
BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);

// Write the values to the GPU buffer
vkCmdUpdateBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
index * sizeof(BBoxType), values.size() * sizeof(BBoxType),
reinterpret_cast<const BBoxType*>(values.data()));

// Flag as dirty, and update values.
m_readback_buffer->Write(index * sizeof(s32), &value, sizeof(value), true);
m_values_dirty[index] = true;
// Restore fragment shader access to the buffer.
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
}

bool BoundingBox::CreateGPUBuffer()
bool VKBoundingBox::CreateGPUBuffer()
{
VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
@@ -204,7 +161,7 @@ bool BoundingBox::CreateGPUBuffer()
return true;
}

bool BoundingBox::CreateReadbackBuffer()
bool VKBoundingBox::CreateReadbackBuffer()
{
m_readback_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_READBACK, BUFFER_SIZE,
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
@@ -215,39 +172,4 @@ bool BoundingBox::CreateReadbackBuffer()
return true;
}

void BoundingBox::Readback()
{
// Can't be done within a render pass.
StateTracker::GetInstance()->EndRenderPass();

// Ensure all writes are completed to the GPU buffer prior to the transfer.
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0,
BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);

// Copy from GPU -> readback buffer.
VkBufferCopy region = {0, 0, BUFFER_SIZE};
vkCmdCopyBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
m_readback_buffer->GetBuffer(), 1, &region);

// Restore GPU buffer access.
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);

// Wait until these commands complete.
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);

// Cache is now valid.
m_readback_buffer->InvalidateCPUCache();
m_valid = true;
}

} // namespace Vulkan
@@ -8,41 +8,35 @@
#include <string>

#include "Common/CommonTypes.h"

#include "VideoBackends/Vulkan/VulkanLoader.h"

#include "VideoCommon/BoundingBox.h"

namespace Vulkan
{
class StagingBuffer;

class BoundingBox
class VKBoundingBox final : public BoundingBox
{
public:
BoundingBox();
~BoundingBox();

bool Initialize();
~VKBoundingBox() override;

s32 Get(size_t index);
void Set(size_t index, s32 value);
bool Initialize() override;

void Invalidate();
void Flush();
protected:
std::vector<BBoxType> Read(u32 index, u32 length) override;
void Write(u32 index, const std::vector<BBoxType>& values) override;

private:
bool CreateGPUBuffer();
bool CreateReadbackBuffer();
void Readback();

VkBuffer m_gpu_buffer = VK_NULL_HANDLE;
VkDeviceMemory m_gpu_memory = VK_NULL_HANDLE;

static const size_t NUM_VALUES = 4;
static const size_t BUFFER_SIZE = sizeof(u32) * NUM_VALUES;
static constexpr size_t BUFFER_SIZE = sizeof(BBoxType) * NUM_BBOX_VALUES;

std::unique_ptr<StagingBuffer> m_readback_buffer;
std::array<bool, NUM_VALUES> m_values_dirty = {};
bool m_valid = true;
};

} // namespace Vulkan
@@ -17,6 +17,7 @@

#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/ObjectCache.h"
#include "VideoBackends/Vulkan/StagingBuffer.h"
#include "VideoBackends/Vulkan/StateTracker.h"
#include "VideoBackends/Vulkan/VKBoundingBox.h"
#include "VideoBackends/Vulkan/VKPerfQuery.h"
@@ -63,13 +64,6 @@ bool Renderer::Initialize()
if (!::Renderer::Initialize())
return false;

m_bounding_box = std::make_unique<BoundingBox>();
if (!m_bounding_box->Initialize())
{
PanicAlertFmt("Failed to initialize bounding box.");
return false;
}

// Various initialization routines will have executed commands on the command buffer.
// Execute what we have done before beginning the first frame.
ExecuteCommandBuffer(true, false);
@@ -132,20 +126,9 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline)
StateTracker::GetInstance()->SetPipeline(static_cast<const VKPipeline*>(pipeline));
}

u16 Renderer::BBoxReadImpl(int index)
{
return static_cast<u16>(m_bounding_box->Get(index));
}

void Renderer::BBoxWriteImpl(int index, u16 value)
{
m_bounding_box->Set(index, value);
}

void Renderer::BBoxFlushImpl()
std::unique_ptr<BoundingBox> Renderer::CreateBoundingBox() const
{
m_bounding_box->Flush();
m_bounding_box->Invalidate();
return std::make_unique<VKBoundingBox>();
}

void Renderer::ClearScreen(const MathUtil::Rectangle<int>& rc, bool color_enable, bool alpha_enable,
@@ -12,11 +12,11 @@
#include "VideoBackends/Vulkan/Constants.h"
#include "VideoCommon/RenderBase.h"

class BoundingBox;
struct XFBSourceBase;

namespace Vulkan
{
class BoundingBox;
class SwapChain;
class StagingTexture2D;
class VKFramebuffer;
@@ -55,10 +55,6 @@ class Renderer : public ::Renderer
size_t cache_data_length = 0) override;

SwapChain* GetSwapChain() const { return m_swap_chain.get(); }
BoundingBox* GetBoundingBox() const { return m_bounding_box.get(); }
u16 BBoxReadImpl(int index) override;
void BBoxWriteImpl(int index, u16 value) override;
void BBoxFlushImpl() override;

void Flush() override;
void WaitForGPUIdle() override;
@@ -92,6 +88,9 @@ class Renderer : public ::Renderer
// next render. Use when you want to kick the current buffer to make room for new data.
void ExecuteCommandBuffer(bool execute_off_thread, bool wait_for_completion = false);

protected:
std::unique_ptr<BoundingBox> CreateBoundingBox() const override;

private:
void CheckForSurfaceChange();
void CheckForSurfaceResize();
@@ -102,7 +101,6 @@ class Renderer : public ::Renderer
void BindFramebuffer(VKFramebuffer* fb);

std::unique_ptr<SwapChain> m_swap_chain;
std::unique_ptr<BoundingBox> m_bounding_box;

// Keep a copy of sampler states to avoid cache lookups every draw
std::array<SamplerState, NUM_PIXEL_SHADER_SAMPLERS> m_sampler_states = {};
@@ -255,7 +255,7 @@ static void BPWritten(const BPCmd& bp)
if (PE_copy.copy_to_xfb == 1)
{
// Make sure we disable Bounding box to match the side effects of the non-failure path
BoundingBox::Disable();
g_renderer->BBoxDisable();
}

return;
@@ -287,7 +287,7 @@ static void BPWritten(const BPCmd& bp)
// We should be able to get away with deactivating the current bbox tracking
// here. Not sure if there's a better spot to put this.
// the number of lines copied is determined by the y scale * source efb height
BoundingBox::Disable();
g_renderer->BBoxDisable();

float yScale;
if (PE_copy.scale_invert)
@@ -452,7 +452,7 @@ static void BPWritten(const BPCmd& bp)
case BPMEM_CLEARBBOX2:
{
const u8 offset = bp.address & 2;
BoundingBox::Enable();
g_renderer->BBoxEnable();

g_renderer->BBoxWrite(offset, bp.newvalue & 0x3ff);
g_renderer->BBoxWrite(offset + 1, bp.newvalue >> 10);
@@ -3,73 +3,115 @@

#include "VideoCommon/BoundingBox.h"

#include <algorithm>
#include <array>

#include "Common/Assert.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/VideoConfig.h"

namespace BoundingBox
{
namespace
{
// Whether or not bounding box is enabled.
bool s_is_active = false;

// Current bounding box coordinates.
std::array<u16, 4> s_coordinates{
0x80,
0xA0,
0x80,
0xA0,
};
} // Anonymous namespace

void Enable()
void BoundingBox::Enable()
{
s_is_active = true;
PixelShaderManager::SetBoundingBoxActive(s_is_active);
m_is_active = true;
PixelShaderManager::SetBoundingBoxActive(m_is_active);
}

void Disable()
void BoundingBox::Disable()
{
s_is_active = false;
PixelShaderManager::SetBoundingBoxActive(s_is_active);
m_is_active = false;
PixelShaderManager::SetBoundingBoxActive(m_is_active);
}

bool IsEnabled()
void BoundingBox::Flush()
{
return s_is_active;
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return;

m_is_valid = false;

if (std::none_of(m_dirty.begin(), m_dirty.end(), [](bool dirty) { return dirty; }))
return;

// TODO: Does this make any difference over just writing all the values?
// Games only ever seem to write all 4 values at once anyways.
for (u32 start = 0; start < NUM_BBOX_VALUES; ++start)
{
if (!m_dirty[start])
continue;

u32 end = start + 1;
while (end < NUM_BBOX_VALUES && m_dirty[end])
++end;

for (u32 i = start; i < end; ++i)
m_dirty[i] = false;

Write(start, std::vector<BBoxType>(m_values.begin() + start, m_values.begin() + end));
}
}

u16 GetCoordinate(Coordinate coordinate)
void BoundingBox::Readback()
{
return s_coordinates[static_cast<u32>(coordinate)];
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return;

auto read_values = Read(0, NUM_BBOX_VALUES);

// Preserve dirty values, that way we don't need to sync.
for (u32 i = 0; i < NUM_BBOX_VALUES; i++)
{
if (!m_dirty[i])
m_values[i] = read_values[i];
}

m_is_valid = true;
}

void SetCoordinate(Coordinate coordinate, u16 value)
u16 BoundingBox::Get(u32 index)
{
s_coordinates[static_cast<u32>(coordinate)] = value;
ASSERT(index < NUM_BBOX_VALUES);

if (!m_is_valid)
Readback();

return static_cast<u16>(m_values[index]);
}

void Update(u16 left, u16 right, u16 top, u16 bottom)
void BoundingBox::Set(u32 index, u16 value)
{
const u16 new_left = std::min(left, GetCoordinate(Coordinate::Left));
const u16 new_right = std::max(right, GetCoordinate(Coordinate::Right));
const u16 new_top = std::min(top, GetCoordinate(Coordinate::Top));
const u16 new_bottom = std::max(bottom, GetCoordinate(Coordinate::Bottom));

SetCoordinate(Coordinate::Left, new_left);
SetCoordinate(Coordinate::Right, new_right);
SetCoordinate(Coordinate::Top, new_top);
SetCoordinate(Coordinate::Bottom, new_bottom);
ASSERT(index < NUM_BBOX_VALUES);

if (m_is_valid && m_values[index] == value)
return;

m_values[index] = value;
m_dirty[index] = true;
}

void DoState(PointerWrap& p)
// FIXME: This may not work correctly if we're in the middle of a draw.
// We should probably ensure that state saves only happen on frame boundaries.
// Nonetheless, it has been designed to be as safe as possible.
void BoundingBox::DoState(PointerWrap& p)
{
p.Do(s_is_active);
p.DoArray(s_coordinates);
}
p.Do(m_is_active);
p.DoArray(m_values);
p.DoArray(m_dirty);
p.Do(m_is_valid);

// We handle saving the backend values specially rather than using Readback() and Flush() so that
// we don't mess up the current cache state
std::vector<BBoxType> backend_values(NUM_BBOX_VALUES);
if (p.GetMode() == PointerWrap::MODE_READ)
{
p.Do(backend_values);

} // namespace BoundingBox
if (g_ActiveConfig.backend_info.bSupportsBBox)
Write(0, backend_values);
}
else
{
if (g_ActiveConfig.backend_info.bSupportsBBox)
backend_values = Read(0, NUM_BBOX_VALUES);

p.Do(backend_values);
}
}
@@ -3,40 +3,48 @@

#pragma once

#include <array>
#include <vector>

#include "Common/CommonTypes.h"

class PointerWrap;

// Bounding Box manager
namespace BoundingBox
{
// Indicates a coordinate of the bounding box.
enum class Coordinate
using BBoxType = s32;
constexpr u32 NUM_BBOX_VALUES = 4;

class BoundingBox
{
Left, // The X coordinate of the left side of the bounding box.
Right, // The X coordinate of the right side of the bounding box.
Top, // The Y coordinate of the top of the bounding box.
Bottom, // The Y coordinate of the bottom of the bounding box.
};
public:
explicit BoundingBox() = default;
virtual ~BoundingBox() = default;

// Enables bounding box.
void Enable();
bool IsEnabled() const { return m_is_active; }
void Enable();
void Disable();

// Disables bounding box.
void Disable();
void Flush();

// Determines if bounding box is enabled.
bool IsEnabled();
u16 Get(u32 index);
void Set(u32 index, u16 value);

// Gets a particular coordinate for the bounding box.
u16 GetCoordinate(Coordinate coordinate);
void DoState(PointerWrap& p);

// Sets a particular coordinate for the bounding box.
void SetCoordinate(Coordinate coordinate, u16 value);
// Initialize, Read, and Write are only safe to call if the backend supports bounding box,
// otherwise unexpected exceptions can occur
virtual bool Initialize() = 0;

// Updates all bounding box coordinates.
void Update(u16 left, u16 right, u16 top, u16 bottom);
protected:
virtual std::vector<BBoxType> Read(u32 index, u32 length) = 0;
// TODO: This can likely use std::span once we're on C++20
virtual void Write(u32 index, const std::vector<BBoxType>& values) = 0;

// Save state
void DoState(PointerWrap& p);
} // namespace BoundingBox
private:
void Readback();

bool m_is_active = false;

std::array<BBoxType, NUM_BBOX_VALUES> m_values = {};
std::array<bool, NUM_BBOX_VALUES> m_dirty = {};
bool m_is_valid = true;
};
@@ -18,6 +18,7 @@
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/VideoBackendBase.h"

namespace PixelEngine
@@ -231,7 +232,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
for (int i = 0; i < 4; ++i)
{
mmio->Register(base | (PE_BBOX_LEFT + 2 * i), MMIO::ComplexRead<u16>([i](u32) {
BoundingBox::Disable();
g_renderer->BBoxDisable();
return g_video_backend->Video_GetBoundingBox(i);
}),
MMIO::InvalidWrite<u16>());
@@ -14,6 +14,7 @@
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoCommon.h"
@@ -178,7 +179,7 @@ PixelShaderUid GetPixelShaderUid()
uid_data->genMode_numindstages = bpmem.genMode.numindstages;
uid_data->genMode_numtevstages = bpmem.genMode.numtevstages;
uid_data->genMode_numtexgens = bpmem.genMode.numtexgens;
uid_data->bounding_box = g_ActiveConfig.bBBoxEnable && BoundingBox::IsEnabled();
uid_data->bounding_box = g_ActiveConfig.bBBoxEnable && g_renderer->IsBBoxEnabled();
uid_data->rgba6_format =
bpmem.zcontrol.pixel_format == PixelFormat::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor;
uid_data->dither = bpmem.blendmode.dither && uid_data->rgba6_format;
@@ -57,6 +57,7 @@
#include "VideoCommon/AbstractTexture.h"
#include "VideoCommon/BPFunctions.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/FPSCounter.h"
@@ -124,6 +125,13 @@ bool Renderer::Initialize()
if (!m_post_processor->Initialize(m_backbuffer_format))
return false;

m_bounding_box = CreateBoundingBox();
if (g_ActiveConfig.backend_info.bSupportsBBox && !m_bounding_box->Initialize())
{
PanicAlertFmt("Failed to initialize bounding box.");
return false;
}

return true;
}

@@ -137,6 +145,7 @@ void Renderer::Shutdown()
ShutdownFrameDumping();
ShutdownImGui();
m_post_processor.reset();
m_bounding_box.reset();
}

void Renderer::BeginUtilityDrawing()
@@ -184,28 +193,46 @@ void Renderer::ReinterpretPixelData(EFBReinterpretType convtype)
g_framebuffer_manager->ReinterpretPixelData(convtype);
}

u16 Renderer::BBoxRead(int index)
bool Renderer::IsBBoxEnabled() const
{
return m_bounding_box->IsEnabled();
}

void Renderer::BBoxEnable()
{
m_bounding_box->Enable();
}

void Renderer::BBoxDisable()
{
m_bounding_box->Disable();
}

u16 Renderer::BBoxRead(u32 index)
{
if (!g_ActiveConfig.bBBoxEnable || !g_ActiveConfig.backend_info.bSupportsBBox)
return m_bounding_box_fallback[index];

return BBoxReadImpl(index);
return m_bounding_box->Get(index);
}

void Renderer::BBoxWrite(int index, u16 value)
void Renderer::BBoxWrite(u32 index, u16 value)
{
if (!g_ActiveConfig.bBBoxEnable || !g_ActiveConfig.backend_info.bSupportsBBox)
{
m_bounding_box_fallback[index] = value;
return;
}

BBoxWriteImpl(index, value);
m_bounding_box->Set(index, value);
}

void Renderer::BBoxFlush()
{
BBoxFlushImpl();
if (!g_ActiveConfig.bBBoxEnable || !g_ActiveConfig.backend_info.bSupportsBBox)
return;

m_bounding_box->Flush();
}

u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
@@ -1761,6 +1788,8 @@ void Renderer::DoState(PointerWrap& p)
p.Do(m_last_xfb_height);
p.DoArray(m_bounding_box_fallback);

m_bounding_box->DoState(p);

if (p.GetMode() == PointerWrap::MODE_READ)
{
// Force the next xfb to be displayed.
@@ -38,6 +38,7 @@ class AbstractPipeline;
class AbstractShader;
class AbstractTexture;
class AbstractStagingTexture;
class BoundingBox;
class NativeVertexFormat;
class NetPlayChatUI;
class PointerWrap;
@@ -213,8 +214,11 @@ class Renderer
virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data);
virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points);

u16 BBoxRead(int index);
void BBoxWrite(int index, u16 value);
bool IsBBoxEnabled() const;
void BBoxEnable();
void BBoxDisable();
u16 BBoxRead(u32 index);
void BBoxWrite(u32 index, u16 value);
void BBoxFlush();

virtual void Flush() {}
@@ -303,9 +307,7 @@ class Renderer
// Should be called with the ImGui lock held.
void DrawImGui();

virtual u16 BBoxReadImpl(int index) = 0;
virtual void BBoxWriteImpl(int index, u16 value) = 0;
virtual void BBoxFlushImpl() {}
virtual std::unique_ptr<BoundingBox> CreateBoundingBox() const = 0;

AbstractFramebuffer* m_current_framebuffer = nullptr;
const AbstractPipeline* m_current_pipeline = nullptr;
@@ -396,6 +398,8 @@ class Renderer
u32 m_last_xfb_stride = 0;
u32 m_last_xfb_height = 0;

std::unique_ptr<BoundingBox> m_bounding_box;

// Nintendo's SDK seems to write "default" bounding box values before every draw (1023 0 1023 0
// are the only values encountered so far, which happen to be the extents allowed by the BP
// registers) to reset the registers for comparison in the pixel engine, and presumably to detect
@@ -269,7 +269,7 @@ void VertexManagerBase::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 nu
void VertexManagerBase::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex)
{
// If bounding box is enabled, we need to flush any changes first, then invalidate what we have.
if (BoundingBox::IsEnabled() && g_ActiveConfig.bBBoxEnable &&
if (g_renderer->IsBBoxEnabled() && g_ActiveConfig.bBBoxEnable &&
g_ActiveConfig.backend_info.bSupportsBBox)
{
g_renderer->BBoxFlush();
@@ -5,7 +5,6 @@

#include "Common/ChunkFile.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
@@ -71,9 +70,6 @@ void VideoCommon_DoState(PointerWrap& p)
g_vertex_manager->DoState(p);
p.DoMarker("VertexManager");

BoundingBox::DoState(p);
p.DoMarker("BoundingBox");

g_framebuffer_manager->DoState(p);
p.DoMarker("FramebufferManager");