Large diffs are not rendered by default.

@@ -11,12 +11,11 @@
#include "Common/GL/GLExtensions/GLExtensions.h"
#include "VideoCommon/RenderBase.h"

struct XFBSourceBase;

namespace OGL
{
class OGLFramebuffer;
class OGLPipeline;
void ClearEFBCache();
class OGLTexture;

enum GlslVersion
{
@@ -86,6 +85,8 @@ class Renderer : public ::Renderer
Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_scale);
~Renderer() override;

static Renderer* GetInstance() { return static_cast<Renderer*>(g_renderer.get()); }

bool IsHeadless() const override;

bool Initialize() override;
@@ -98,73 +99,80 @@ class Renderer : public ::Renderer
size_t length) override;
std::unique_ptr<AbstractShader> CreateShaderFromBinary(ShaderStage stage, const void* data,
size_t length) override;
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config) override;
std::unique_ptr<AbstractFramebuffer>
CreateFramebuffer(const AbstractTexture* color_attachment,
const AbstractTexture* depth_attachment) override;
CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override;

void SetPipeline(const AbstractPipeline* pipeline) override;
void SetFramebuffer(const AbstractFramebuffer* framebuffer) override;
void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) override;
void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer,
const ClearColor& color_value = {},
void SetFramebuffer(AbstractFramebuffer* framebuffer) override;
void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override;
void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {},
float depth_value = 0.0f) override;
void SetScissorRect(const MathUtil::Rectangle<int>& rc) override;
void SetTexture(u32 index, const AbstractTexture* texture) override;
void SetSamplerState(u32 index, const SamplerState& state) override;
void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override;
void UnbindTexture(const AbstractTexture* texture) override;
void SetInterlacingMode() override;
void SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;

u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override;
void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override;

u16 BBoxRead(int index) override;
void BBoxWrite(int index, u16 value) override;

void ResetAPIState() override;
void RestoreAPIState() override;

TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;
void BeginUtilityDrawing() override;
void EndUtilityDrawing() override;

void Flush() override;
void WaitForGPUIdle() override;
void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override;
void OnConfigChanged(u32 bits) override;

void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z) override;

void ReinterpretPixelData(unsigned int convtype) override;

std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler() override;

// Only call methods from this on the GPU thread.
GLContext* GetMainGLContext() const { return m_main_gl_context.get(); }
bool IsGLES() const { return m_main_gl_context->IsGLES(); }

const OGLPipeline* GetCurrentGraphicsPipeline() const { return m_graphics_pipeline; }
// Invalidates a cached texture binding. Required for texel buffers when they borrow the units.
void InvalidateTextureBinding(u32 index) { m_bound_textures[index] = nullptr; }

private:
void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc,
const TargetRectangle& targetPixelRc, const void* data);
// The shared framebuffer exists for copying textures when extensions are not available. It is
// slower, but the only way to do these things otherwise.
GLuint GetSharedReadFramebuffer() const { return m_shared_read_framebuffer; }
GLuint GetSharedDrawFramebuffer() const { return m_shared_draw_framebuffer; }
void BindSharedReadFramebuffer();
void BindSharedDrawFramebuffer();

// Restores FBO binding after it's been changed.
void RestoreFramebufferBinding();

private:
void CheckForSurfaceChange();
void CheckForSurfaceResize();

void ApplyBlendingState(const BlendingState state, bool force = false);
void ApplyRasterizationState(const RasterizationState state, bool force = false);
void ApplyDepthState(const DepthState state, bool force = false);
void ApplyRasterizationState(const RasterizationState state);
void ApplyDepthState(const DepthState state);
void ApplyBlendingState(const BlendingState state);

std::unique_ptr<GLContext> m_main_gl_context;
std::array<const AbstractTexture*, 8> m_bound_textures{};
const OGLPipeline* m_graphics_pipeline = nullptr;
std::unique_ptr<OGLFramebuffer> m_system_framebuffer;
std::array<const OGLTexture*, 8> m_bound_textures{};
AbstractTexture* m_bound_image_texture = nullptr;
RasterizationState m_current_rasterization_state;
DepthState m_current_depth_state;
BlendingState m_current_blend_state;
GLuint m_shared_read_framebuffer = 0;
GLuint m_shared_draw_framebuffer = 0;
};
} // namespace OGL
@@ -19,6 +19,8 @@ class StreamBuffer
static std::unique_ptr<StreamBuffer> Create(u32 type, u32 size);
virtual ~StreamBuffer();

u32 GetGLBufferId() const { return m_buffer; }
u32 GetSize() const { return m_size; }
u32 GetCurrentOffset() const { return m_iterator; }

/* This mapping function will return a pair of:
@@ -64,4 +66,4 @@ class StreamBuffer

std::array<GLsync, SYNC_POINTS> m_fences{};
};
}
} // namespace OGL

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

@@ -9,17 +9,14 @@
#include <string>
#include <vector>

#include "Common/Align.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/GL/GLExtensions/GLExtensions.h"
#include "Common/StringUtil.h"

#include "VideoBackends/OGL/BoundingBox.h"
#include "VideoBackends/OGL/OGLPipeline.h"
#include "VideoBackends/OGL/ProgramShaderCache.h"
#include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/StreamBuffer.h"
#include "VideoCommon/BoundingBox.h"

#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/Statistics.h"
@@ -28,79 +25,147 @@

namespace OGL
{
// This are the initially requested size for the buffers expressed in bytes
const u32 MAX_IBUFFER_SIZE = 2 * 1024 * 1024;
const u32 MAX_VBUFFER_SIZE = 32 * 1024 * 1024;

VertexManager::VertexManager() : m_cpu_v_buffer(MAX_VBUFFER_SIZE), m_cpu_i_buffer(MAX_IBUFFER_SIZE)
static void CheckBufferBinding()
{
CreateDeviceObjects();
// The index buffer is part of the VAO state, therefore we need to bind it first.
if (!ProgramShaderCache::IsValidVertexFormatBound())
{
ProgramShaderCache::BindVertexFormat(
static_cast<GLVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat()));
}
}

VertexManager::VertexManager() = default;

VertexManager::~VertexManager()
{
DestroyDeviceObjects();
}
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
glDeleteTextures(static_cast<GLsizei>(m_texel_buffer_views.size()),
m_texel_buffer_views.data());
}

void VertexManager::CreateDeviceObjects()
{
m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, MAX_VBUFFER_SIZE);
m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE);
// VAO must be found when destroying the index buffer.
CheckBufferBinding();
m_texel_buffer.reset();
m_index_buffer.reset();
m_vertex_buffer.reset();
}

void VertexManager::DestroyDeviceObjects()
bool VertexManager::Initialize()
{
m_vertex_buffer.reset();
m_index_buffer.reset();
if (!VertexManagerBase::Initialize())
return false;

m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_STREAM_BUFFER_SIZE);
m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_STREAM_BUFFER_SIZE);

if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB
// buffer here. This buffer is also used as storage for undecoded textures when compute shader
// texture decoding is enabled, in which case the requested size is 32MB.
GLint max_buffer_size;
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
m_texel_buffer = StreamBuffer::Create(
GL_TEXTURE_BUFFER, std::min(max_buffer_size, static_cast<GLint>(TEXEL_STREAM_BUFFER_SIZE)));

// Allocate texture views backed by buffer.
static constexpr std::array<std::pair<TexelBufferFormat, GLenum>, NUM_TEXEL_BUFFER_FORMATS>
format_mapping = {{
{TEXEL_BUFFER_FORMAT_R8_UINT, GL_R8UI},
{TEXEL_BUFFER_FORMAT_R16_UINT, GL_R16UI},
{TEXEL_BUFFER_FORMAT_RGBA8_UINT, GL_RGBA8},
{TEXEL_BUFFER_FORMAT_R32G32_UINT, GL_RG32UI},
}};
glGenTextures(static_cast<GLsizei>(m_texel_buffer_views.size()), m_texel_buffer_views.data());
glActiveTexture(GL_MUTABLE_TEXTURE_INDEX);
for (const auto& it : format_mapping)
{
glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[it.first]);
glTexBuffer(GL_TEXTURE_BUFFER, it.second, m_texel_buffer->GetGLBufferId());
}
}

return true;
}

void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
{
ProgramShaderCache::InvalidateConstants();
InvalidateConstants();
ProgramShaderCache::UploadConstants(uniforms, uniforms_size);
}

GLuint VertexManager::GetVertexBufferHandle() const
bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset)
{
return m_vertex_buffer->m_buffer;
if (data_size > m_texel_buffer->GetSize())
return false;

const u32 elem_size = GetTexelBufferElementSize(format);
const auto dst = m_texel_buffer->Map(data_size, elem_size);
std::memcpy(dst.first, data, data_size);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size);
*out_offset = dst.second / elem_size;
m_texel_buffer->Unmap(data_size);

// Bind the correct view to the texel buffer slot.
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast<u32>(format)]);
Renderer::GetInstance()->InvalidateTextureBinding(0);
return true;
}

GLuint VertexManager::GetIndexBufferHandle() const
bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset, const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format, u32* out_palette_offset)
{
return m_index_buffer->m_buffer;
const u32 elem_size = GetTexelBufferElementSize(format);
const u32 palette_elem_size = GetTexelBufferElementSize(palette_format);
const u32 reserve_size = data_size + palette_size + palette_elem_size;
if (reserve_size > m_texel_buffer->GetSize())
return false;

const auto dst = m_texel_buffer->Map(reserve_size, elem_size);
const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size);
std::memcpy(dst.first, data, data_size);
std::memcpy(dst.first + palette_byte_offset, palette_data, palette_size);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size);
*out_offset = dst.second / elem_size;
*out_palette_offset = (dst.second + palette_byte_offset) / palette_elem_size;
m_texel_buffer->Unmap(palette_byte_offset + palette_size);

glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast<u32>(format)]);
Renderer::GetInstance()->InvalidateTextureBinding(0);

glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast<u32>(palette_format)]);
Renderer::GetInstance()->InvalidateTextureBinding(1);

return true;
}

static void CheckBufferBinding()
GLuint VertexManager::GetVertexBufferHandle() const
{
// The index buffer is part of the VAO state, therefore we need to bind it first.
if (!ProgramShaderCache::IsValidVertexFormatBound())
{
ProgramShaderCache::BindVertexFormat(
static_cast<GLVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat()));
}
return m_vertex_buffer->m_buffer;
}

void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all)
GLuint VertexManager::GetIndexBufferHandle() const
{
if (cull_all)
{
// This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_v_buffer.data();
m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_v_buffer.size();
return m_index_buffer->m_buffer;
}

IndexGenerator::Start((u16*)m_cpu_i_buffer.data());
}
else
{
CheckBufferBinding();
void VertexManager::ResetBuffer(u32 vertex_stride)
{
CheckBufferBinding();

auto buffer = m_vertex_buffer->Map(MAXVBUFFERSIZE, vertex_stride);
m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first;
m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE;
auto buffer = m_vertex_buffer->Map(MAXVBUFFERSIZE, vertex_stride);
m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first;
m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE;

buffer = m_index_buffer->Map(MAXIBUFFERSIZE * sizeof(u16));
IndexGenerator::Start((u16*)buffer.first);
}
buffer = m_index_buffer->Map(MAXIBUFFERSIZE * sizeof(u16));
IndexGenerator::Start((u16*)buffer.first);
}

void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
@@ -120,31 +185,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in
ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size);
}

void VertexManager::UploadConstants()
void VertexManager::UploadUniforms()
{
ProgramShaderCache::UploadConstants();
}

void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex)
{
if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation())
{
glEnable(GL_STENCIL_TEST);
}

if (m_current_pipeline_object)
{
static_cast<Renderer*>(g_renderer.get())->SetPipeline(m_current_pipeline_object);
static_cast<Renderer*>(g_renderer.get())->DrawIndexed(base_index, num_indices, base_vertex);
}

if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation())
{
OGL::BoundingBox::StencilWasUpdated();
glDisable(GL_STENCIL_TEST);
}

g_Config.iSaveTargetId++;
ClearEFBCache();
}
} // namespace OGL
@@ -4,8 +4,8 @@

#pragma once

#include <array>
#include <memory>
#include <vector>

#include "Common/CommonTypes.h"
#include "Common/GL/GLUtil.h"
@@ -26,35 +26,34 @@ class GLVertexFormat : public NativeVertexFormat

// Handles the OpenGL details of drawing lots of vertices quickly.
// Other functionality is moving out.
class VertexManager : public VertexManagerBase
class VertexManager final : public VertexManagerBase
{
public:
VertexManager();
~VertexManager();
~VertexManager() override;

std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
bool Initialize() override;

void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override;
bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset) override;
bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset,
const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format, u32* out_palette_offset) override;

GLuint GetVertexBufferHandle() const;
GLuint GetIndexBufferHandle() const;

protected:
void CreateDeviceObjects() override;
void DestroyDeviceObjects() override;
void ResetBuffer(u32 vertex_stride, bool cull_all) override;
void ResetBuffer(u32 vertex_stride) override;
void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex,
u32* out_base_index) override;
void UploadConstants() override;
void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override;
void UploadUniforms() override;

private:
std::unique_ptr<StreamBuffer> m_vertex_buffer;
std::unique_ptr<StreamBuffer> m_index_buffer;

// Alternative buffers in CPU memory for primatives we are going to discard.
std::vector<u8> m_cpu_v_buffer;
std::vector<u16> m_cpu_i_buffer;
std::unique_ptr<StreamBuffer> m_texel_buffer;
std::array<GLuint, NUM_TEXEL_BUFFER_FORMATS> m_texel_buffer_views{};
};
} // namespace OGL
@@ -50,13 +50,11 @@ Make AA apply instantly during gameplay if possible
#include "VideoBackends/OGL/ProgramShaderCache.h"
#include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/SamplerCache.h"
#include "VideoBackends/OGL/TextureCache.h"
#include "VideoBackends/OGL/TextureConverter.h"
#include "VideoBackends/OGL/VertexManager.h"
#include "VideoBackends/OGL/VideoBackend.h"

#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoConfig.h"

namespace OGL
@@ -78,6 +76,7 @@ void VideoBackend::InitBackendInfo()
{
g_Config.backend_info.api_type = APIType::OpenGL;
g_Config.backend_info.MaxTextureSize = 16384;
g_Config.backend_info.bUsesLowerLeftOrigin = true;
g_Config.backend_info.bSupportsExclusiveFullscreen = false;
g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsGeometryShaders = true;
@@ -89,6 +88,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsLogicOp = true;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsCopyToVram = true;
g_Config.backend_info.bSupportsLargePoints = true;

// TODO: There is a bug here, if texel buffers are not supported the graphics options
// will show the option when it is not supported. The only way around this would be
@@ -173,31 +173,40 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi)
return false;

g_renderer = std::make_unique<Renderer>(std::move(main_gl_context), wsi.render_surface_scale);
ProgramShaderCache::Init();
g_vertex_manager = std::make_unique<VertexManager>();
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
g_framebuffer_manager = std::make_unique<FramebufferManager>();
g_perf_query = GetPerfQuery();
ProgramShaderCache::Init();
g_texture_cache = std::make_unique<TextureCache>();
g_texture_cache = std::make_unique<TextureCacheBase>();
g_sampler_cache = std::make_unique<SamplerCache>();
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
if (!g_renderer->Initialize())
BoundingBox::Init();

if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() ||
!g_renderer->Initialize() || !g_framebuffer_manager->Initialize() ||
!g_texture_cache->Initialize())
{
PanicAlert("Failed to initialize renderer classes");
Shutdown();
return false;
TextureConverter::Init();
BoundingBox::Init(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight());
return g_shader_cache->Initialize();
}

g_shader_cache->InitializeShaderCache();
return true;
}

void VideoBackend::Shutdown()
{
g_shader_cache->Shutdown();
g_renderer->Shutdown();
BoundingBox::Shutdown();
TextureConverter::Shutdown();
g_shader_cache.reset();
g_sampler_cache.reset();
g_texture_cache.reset();
ProgramShaderCache::Shutdown();
g_perf_query.reset();
g_vertex_manager.reset();
g_framebuffer_manager.reset();
g_shader_cache.reset();
ProgramShaderCache::Shutdown();
g_renderer.reset();
ShutdownShared();
}
@@ -19,11 +19,15 @@

#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"
#include "VideoCommon/AbstractTexture.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoConfig.h"

namespace SW
{
SWRenderer::SWRenderer(std::unique_ptr<SWOGLWindow> window)
: ::Renderer(static_cast<int>(MAX_XFB_WIDTH), static_cast<int>(MAX_XFB_HEIGHT), 1.0f,
AbstractTextureFormat::RGBA8),
@@ -38,21 +42,20 @@ bool SWRenderer::IsHeadless() const

std::unique_ptr<AbstractTexture> SWRenderer::CreateTexture(const TextureConfig& config)
{
return std::make_unique<SW::SWTexture>(config);
return std::make_unique<SWTexture>(config);
}

std::unique_ptr<AbstractStagingTexture>
SWRenderer::CreateStagingTexture(StagingTextureType type, const TextureConfig& config)
{
return std::make_unique<SW::SWStagingTexture>(type, config);
return std::make_unique<SWStagingTexture>(type, config);
}

std::unique_ptr<AbstractFramebuffer>
SWRenderer::CreateFramebuffer(const AbstractTexture* color_attachment,
const AbstractTexture* depth_attachment)
SWRenderer::CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment)
{
return SW::SWFramebuffer::Create(static_cast<const SW::SWTexture*>(color_attachment),
static_cast<const SW::SWTexture*>(depth_attachment));
return SWFramebuffer::Create(static_cast<SWTexture*>(color_attachment),
static_cast<SWTexture*>(depth_attachment));
}

class SWShader final : public AbstractShader
@@ -132,18 +135,15 @@ void SWRenderer::BBoxWrite(int index, u16 value)
BoundingBox::coords[index] = value;
}

TargetRectangle SWRenderer::ConvertEFBRectangle(const EFBRectangle& rc)
{
TargetRectangle result;
result.left = rc.left;
result.top = rc.top;
result.right = rc.right;
result.bottom = rc.bottom;
return result;
}

void SWRenderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable,
bool zEnable, u32 color, u32 z)
{
EfbCopy::ClearEfb();
}

std::unique_ptr<NativeVertexFormat>
SWRenderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
return std::make_unique<NativeVertexFormat>(vtx_decl);
}
} // namespace SW
@@ -12,7 +12,9 @@

class SWOGLWindow;

class SWRenderer : public Renderer
namespace SW
{
class SWRenderer final : public Renderer
{
public:
SWRenderer(std::unique_ptr<SWOGLWindow> window);
@@ -23,29 +25,33 @@ class SWRenderer : public Renderer
std::unique_ptr<AbstractStagingTexture>
CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override;
std::unique_ptr<AbstractFramebuffer>
CreateFramebuffer(const AbstractTexture* color_attachment,
const AbstractTexture* depth_attachment) override;
CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override;

std::unique_ptr<AbstractShader> CreateShaderFromSource(ShaderStage stage, const char* source,
size_t length) override;
std::unique_ptr<AbstractShader> CreateShaderFromBinary(ShaderStage stage, const void* data,
size_t length) override;
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config) override;

u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override;
void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override {}
u16 BBoxRead(int index) override;
void BBoxWrite(int index, u16 value) override;

TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;

void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override;

void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
u32 color, u32 z) override;

void ReinterpretPixelData(unsigned int convtype) override {}
void ReinterpretPixelData(EFBReinterpretType convtype) override {}

void ScaleTexture(AbstractFramebuffer* dst_framebuffer, const MathUtil::Rectangle<int>& dst_rect,
const AbstractTexture* src_texture,
const MathUtil::Rectangle<int>& src_rect) override;

private:
std::unique_ptr<SWOGLWindow> m_window;
};
} // namespace SW
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.

#include "VideoBackends/Software/SWTexture.h"
#include "VideoBackends/Software/SWRenderer.h"

#include <cstring>
#include "Common/Assert.h"
@@ -45,6 +46,25 @@ void CopyTextureData(const TextureConfig& src_config, const u8* src_ptr, u32 src
dst_ptr += dst_stride;
}
}
} // namespace

void SWRenderer::ScaleTexture(AbstractFramebuffer* dst_framebuffer,
const MathUtil::Rectangle<int>& dst_rect,
const AbstractTexture* src_texture,
const MathUtil::Rectangle<int>& src_rect)
{
const SWTexture* software_source_texture = static_cast<const SWTexture*>(src_texture);
SWTexture* software_dest_texture = static_cast<SWTexture*>(dst_framebuffer->GetColorAttachment());

std::vector<Pixel> source_pixels;
source_pixels.resize(src_rect.GetHeight() * src_rect.GetWidth() * 4);
memcpy(source_pixels.data(), software_source_texture->GetData(), source_pixels.size());

std::vector<Pixel> destination_pixels;
destination_pixels.resize(dst_rect.GetHeight() * dst_rect.GetWidth() * 4);

CopyRegion(source_pixels.data(), src_rect, destination_pixels.data(), dst_rect);
memcpy(software_dest_texture->GetData(), destination_pixels.data(), destination_pixels.size());
}

SWTexture::SWTexture(const TextureConfig& tex_config) : AbstractTexture(tex_config)
@@ -62,30 +82,6 @@ void SWTexture::CopyRectangleFromTexture(const AbstractTexture* src,
src_rect.left, src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), m_config,
m_data.data(), dst_rect.left, dst_rect.top);
}
void SWTexture::ScaleRectangleFromTexture(const AbstractTexture* source,
const MathUtil::Rectangle<int>& srcrect,
const MathUtil::Rectangle<int>& dstrect)
{
const SWTexture* software_source_texture = static_cast<const SWTexture*>(source);

if (srcrect.GetWidth() == dstrect.GetWidth() && srcrect.GetHeight() == dstrect.GetHeight())
{
m_data.assign(software_source_texture->GetData(),
software_source_texture->GetData() + m_data.size());
}
else
{
std::vector<Pixel> source_pixels;
source_pixels.resize(srcrect.GetHeight() * srcrect.GetWidth() * 4);
memcpy(source_pixels.data(), software_source_texture->GetData(), source_pixels.size());

std::vector<Pixel> destination_pixels;
destination_pixels.resize(dstrect.GetHeight() * dstrect.GetWidth() * 4);

CopyRegion(source_pixels.data(), srcrect, destination_pixels.data(), dstrect);
memcpy(GetData(), destination_pixels.data(), destination_pixels.size());
}
}
void SWTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& rect,
u32 layer, u32 level)
{
@@ -153,14 +149,16 @@ void SWStagingTexture::Flush()
m_needs_flush = false;
}

SWFramebuffer::SWFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format,
SWFramebuffer::SWFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment,
AbstractTextureFormat color_format, AbstractTextureFormat depth_format,
u32 width, u32 height, u32 layers, u32 samples)
: AbstractFramebuffer(color_format, depth_format, width, height, layers, samples)
: AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width,
height, layers, samples)
{
}

std::unique_ptr<SWFramebuffer> SWFramebuffer::Create(const SWTexture* color_attachment,
const SWTexture* depth_attachment)
std::unique_ptr<SWFramebuffer> SWFramebuffer::Create(SWTexture* color_attachment,
SWTexture* depth_attachment)
{
if (!ValidateConfig(color_attachment, depth_attachment))
return nullptr;
@@ -175,8 +173,8 @@ std::unique_ptr<SWFramebuffer> SWFramebuffer::Create(const SWTexture* color_atta
const u32 layers = either_attachment->GetLayers();
const u32 samples = either_attachment->GetSamples();

return std::make_unique<SWFramebuffer>(color_format, depth_format, width, height, layers,
samples);
return std::make_unique<SWFramebuffer>(color_attachment, depth_attachment, color_format,
depth_format, width, height, layers, samples);
}

} // namespace SW
@@ -25,9 +25,6 @@ class SWTexture final : public AbstractTexture
const MathUtil::Rectangle<int>& src_rect, u32 src_layer,
u32 src_level, const MathUtil::Rectangle<int>& dst_rect,
u32 dst_layer, u32 dst_level) override;
void ScaleRectangleFromTexture(const AbstractTexture* source,
const MathUtil::Rectangle<int>& srcrect,
const MathUtil::Rectangle<int>& dstrect) override;
void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& rect,
u32 layer, u32 level) override;
void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer,
@@ -66,12 +63,13 @@ class SWStagingTexture final : public AbstractStagingTexture
class SWFramebuffer final : public AbstractFramebuffer
{
public:
explicit SWFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format,
explicit SWFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment,
AbstractTextureFormat color_format, AbstractTextureFormat depth_format,
u32 width, u32 height, u32 layers, u32 samples);
~SWFramebuffer() override = default;

static std::unique_ptr<SWFramebuffer> Create(const SWTexture* color_attachment,
const SWTexture* depth_attachment);
static std::unique_ptr<SWFramebuffer> Create(SWTexture* color_attachment,
SWTexture* depth_attachment);
};

} // namespace SW
@@ -14,6 +14,7 @@
#include "VideoBackends/Software/DebugUtil.h"
#include "VideoBackends/Software/NativeVertexFormat.h"
#include "VideoBackends/Software/Rasterizer.h"
#include "VideoBackends/Software/SWRenderer.h"
#include "VideoBackends/Software/Tev.h"
#include "VideoBackends/Software/TransformUnit.h"

@@ -27,48 +28,9 @@
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"

class NullNativeVertexFormat : public NativeVertexFormat
{
public:
NullNativeVertexFormat(const PortableVertexDeclaration& _vtx_decl) { vtx_decl = _vtx_decl; }
};

std::unique_ptr<NativeVertexFormat>
SWVertexLoader::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
return std::make_unique<NullNativeVertexFormat>(vtx_decl);
}
SWVertexLoader::SWVertexLoader() = default;

SWVertexLoader::SWVertexLoader()
: m_local_vertex_buffer(MAXVBUFFERSIZE), m_local_index_buffer(MAXIBUFFERSIZE)
{
}

SWVertexLoader::~SWVertexLoader()
{
}

void SWVertexLoader::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
{
}

void SWVertexLoader::ResetBuffer(u32 vertex_stride, bool cull_all)
{
m_cur_buffer_pointer = m_base_buffer_pointer = m_local_vertex_buffer.data();
m_end_buffer_pointer = m_cur_buffer_pointer + m_local_vertex_buffer.size();
IndexGenerator::Start(m_local_index_buffer.data());
}

void SWVertexLoader::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
u32* out_base_vertex, u32* out_base_index)
{
*out_base_vertex = 0;
*out_base_index = 0;
}

void SWVertexLoader::UploadConstants()
{
}
SWVertexLoader::~SWVertexLoader() = default;

void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex)
{
@@ -104,7 +66,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_

for (u32 i = 0; i < IndexGenerator::GetIndexLen(); i++)
{
const u16 index = m_local_index_buffer[i];
const u16 index = m_cpu_index_buffer[i];
memset(static_cast<void*>(&m_vertex), 0, sizeof(m_vertex));

// Super Mario Sunshine requires those to be zero for those debug boxes.
@@ -224,8 +186,8 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f

void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec, int index)
{
DataReader src(m_local_vertex_buffer.data(),
m_local_vertex_buffer.data() + m_local_vertex_buffer.size());
DataReader src(m_cpu_vertex_buffer.data(),
m_cpu_vertex_buffer.data() + m_cpu_vertex_buffer.size());
src.Skip(index * vdec.stride);

ReadVertexAttribute<float>(&m_vertex.position[0], src, vdec.position, 0, 3, false);
@@ -20,24 +20,12 @@ class SWVertexLoader final : public VertexManagerBase
SWVertexLoader();
~SWVertexLoader();

std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vdec) override;

void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override;

protected:
void ResetBuffer(u32 vertex_stride, bool cull_all) override;
void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex,
u32* out_base_index) override;
void UploadConstants() override;
void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override;

void SetFormat(u8 attributeIndex, u8 primitiveType);
void ParseVertex(const PortableVertexDeclaration& vdec, int index);

std::vector<u8> m_local_vertex_buffer;
std::vector<u16> m_local_index_buffer;

InputVertexData m_vertex;
SetupUnit m_setup_unit;

@@ -10,6 +10,7 @@
#include "Common/Common.h"
#include "Common/CommonTypes.h"
#include "Common/GL/GLContext.h"
#include "Common/MsgHandler.h"

#include "VideoBackends/Software/Clipper.h"
#include "VideoBackends/Software/DebugUtil.h"
@@ -22,14 +23,11 @@
#include "VideoBackends/Software/TextureCache.h"
#include "VideoBackends/Software/VideoBackend.h"

#include "VideoCommon/FramebufferManagerBase.h"
#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"

#define VSYNC_ENABLED 0

namespace SW
{
class PerfQuery : public PerfQueryBase
@@ -59,6 +57,7 @@ void VideoSoftware::InitBackendInfo()
{
g_Config.backend_info.api_type = APIType::Nothing;
g_Config.backend_info.MaxTextureSize = 16384;
g_Config.backend_info.bUsesLowerLeftOrigin = false;
g_Config.backend_info.bSupports3DVision = false;
g_Config.backend_info.bSupportsDualSourceBlend = true;
g_Config.backend_info.bSupportsEarlyZ = true;
@@ -70,6 +69,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsST3CTextures = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsCopyToVram = false;
g_Config.backend_info.bSupportsLargePoints = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = false;
g_Config.backend_info.bSupportsLogicOp = true;
@@ -92,10 +92,22 @@ bool VideoSoftware::Initialize(const WindowSystemInfo& wsi)

g_renderer = std::make_unique<SWRenderer>(std::move(window));
g_vertex_manager = std::make_unique<SWVertexLoader>();
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
g_framebuffer_manager = std::make_unique<FramebufferManager>();
g_perf_query = std::make_unique<PerfQuery>();
g_texture_cache = std::make_unique<TextureCache>();
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
return g_renderer->Initialize() && g_shader_cache->Initialize();

if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() ||
!g_renderer->Initialize() || !g_framebuffer_manager->Initialize() ||
!g_texture_cache->Initialize())
{
PanicAlert("Failed to initialize renderer classes");
Shutdown();
return false;
}

g_shader_cache->InitializeShaderCache();
return true;
}

void VideoSoftware::Shutdown()
@@ -107,9 +119,10 @@ void VideoSoftware::Shutdown()
g_renderer->Shutdown();

DebugUtil::Shutdown();
g_framebuffer_manager.reset();
g_texture_cache.reset();
g_perf_query.reset();
g_framebuffer_manager.reset();
g_shader_cache.reset();
g_vertex_manager.reset();
g_renderer.reset();
ShutdownShared();
@@ -9,27 +9,19 @@ namespace SW
{
class TextureCache : public TextureCacheBase
{
public:
bool CompileShaders() override { return true; }
void DeleteShaders() override {}
void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TLUTFormat format) override
{
}
protected:
void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
const EFBCopyFilterCoefficients& filter_coefficients) override
{
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
src_rect, scale_by_half, y_scale, gamma);
}

private:
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const CopyFilterCoefficientArray& filter_coefficients) override
const EFBCopyFilterCoefficients& filter_coefficients) override
{
// TODO: If we ever want to "fake" vram textures, we would need to implement this
}
@@ -13,7 +13,6 @@
#include "VideoBackends/Vulkan/Renderer.h"
#include "VideoBackends/Vulkan/StagingBuffer.h"
#include "VideoBackends/Vulkan/StateTracker.h"
#include "VideoBackends/Vulkan/Util.h"
#include "VideoBackends/Vulkan/VulkanContext.h"

namespace Vulkan
@@ -33,7 +32,7 @@ BoundingBox::~BoundingBox()

bool BoundingBox::Initialize()
{
if (!g_vulkan_context->SupportsBoundingBox())
if (!g_ActiveConfig.backend_info.bSupportsBBox)
{
WARN_LOG(VIDEO, "Vulkan: Bounding box is unsupported by your device.");
return true;
@@ -45,6 +44,8 @@ bool BoundingBox::Initialize()
if (!CreateReadbackBuffer())
return false;

// Bind bounding box to state tracker
StateTracker::GetInstance()->SetSSBO(m_gpu_buffer, 0, BUFFER_SIZE);
return true;
}

@@ -79,7 +80,7 @@ void BoundingBox::Flush()
StateTracker::GetInstance()->EndRenderPass();

// Ensure GPU buffer is in a state where it can be transferred to.
Util::BufferMemoryBarrier(
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0,
BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
@@ -95,7 +96,7 @@ void BoundingBox::Flush()
// Restore fragment shader access to the buffer.
if (updated_buffer)
{
Util::BufferMemoryBarrier(
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
@@ -219,7 +220,7 @@ void BoundingBox::Readback()
StateTracker::GetInstance()->EndRenderPass();

// Ensure all writes are completed to the GPU buffer prior to the transfer.
Util::BufferMemoryBarrier(
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0,
BUFFER_SIZE, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
@@ -233,15 +234,15 @@ void BoundingBox::Readback()
m_readback_buffer->GetBuffer(), 1, &region);

// Restore GPU buffer access.
Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
VK_ACCESS_TRANSFER_READ_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
StagingBuffer::BufferMemoryBarrier(
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);

// Wait until these commands complete.
Util::ExecuteCurrentCommandsAndRestoreState(false, true);
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);

// Cache is now valid.
m_readback_buffer->InvalidateCPUCache();
@@ -24,9 +24,6 @@ class BoundingBox

bool Initialize();

VkBuffer GetGPUBuffer() const { return m_gpu_buffer; }
VkDeviceSize GetGPUBufferOffset() const { return 0; }
VkDeviceSize GetGPUBufferSize() const { return BUFFER_SIZE; }
s32 Get(size_t index);
void Set(size_t index, s32 value);

@@ -1,21 +1,14 @@
add_library(videovulkan
BoundingBox.cpp
CommandBufferManager.cpp
FramebufferManager.cpp
ObjectCache.cpp
PerfQuery.cpp
PostProcessing.cpp
Renderer.cpp
ShaderCache.cpp
ShaderCompiler.cpp
StateTracker.cpp
StagingBuffer.cpp
StreamBuffer.cpp
SwapChain.cpp
Texture2D.cpp
TextureCache.cpp
TextureConverter.cpp
Util.cpp
VertexFormat.cpp
VertexManager.cpp
VKPipeline.cpp
@@ -44,12 +44,16 @@ bool CommandBufferManager::Initialize()

bool CommandBufferManager::CreateCommandBuffers()
{
static constexpr VkSemaphoreCreateInfo semaphore_create_info = {
VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0};

VkDevice device = g_vulkan_context->GetDevice();
VkResult res;

for (FrameResources& resources : m_frame_resources)
{
resources.init_command_buffer_used = false;
resources.semaphore_used = false;
resources.needs_fence_wait = false;

VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0,
@@ -83,6 +87,13 @@ bool CommandBufferManager::CreateCommandBuffers()
return false;
}

res = vkCreateSemaphore(device, &semaphore_create_info, nullptr, &resources.semaphore);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: ");
return false;
}

// TODO: A better way to choose the number of descriptors.
VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000},
@@ -105,9 +116,16 @@ bool CommandBufferManager::CreateCommandBuffers()
}
}

res = vkCreateSemaphore(device, &semaphore_create_info, nullptr, &m_present_semaphore);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: ");
return false;
}

// Activate the first command buffer. ActivateCommandBuffer moves forward, so start with the last
m_current_frame = m_frame_resources.size() - 1;
ActivateCommandBuffer();
m_current_frame = static_cast<u32>(m_frame_resources.size()) - 1;
BeginCommandBuffer();
return true;
}

@@ -122,28 +140,23 @@ void CommandBufferManager::DestroyCommandBuffers()
// We destroy the command pool first, to avoid any warnings from the validation layers about
// objects which are pending destruction being in-use.
if (resources.command_pool != VK_NULL_HANDLE)
{
vkDestroyCommandPool(device, resources.command_pool, nullptr);
resources.command_pool = VK_NULL_HANDLE;
}

// Destroy any pending objects.
for (auto& it : resources.cleanup_resources)
it();
resources.cleanup_resources.clear();

if (resources.semaphore != VK_NULL_HANDLE)
vkDestroySemaphore(device, resources.semaphore, nullptr);

if (resources.fence != VK_NULL_HANDLE)
{
vkDestroyFence(device, resources.fence, nullptr);
resources.fence = VK_NULL_HANDLE;
}

if (resources.descriptor_pool != VK_NULL_HANDLE)
{
vkDestroyDescriptorPool(device, resources.descriptor_pool, nullptr);
resources.descriptor_pool = VK_NULL_HANDLE;
}
}

vkDestroySemaphore(device, m_present_semaphore, nullptr);
}

VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayout set_layout)
@@ -183,22 +196,14 @@ bool CommandBufferManager::CreateSubmitThread()
m_pending_submits.pop_front();
}

SubmitCommandBuffer(submit.index, submit.wait_semaphore, submit.signal_semaphore,
submit.present_swap_chain, submit.present_image_index);
SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain,
submit.present_image_index);
});
});

return true;
}

void CommandBufferManager::PrepareToSubmitCommandBuffer()
{
// Grab the semaphore before submitting command buffer either on-thread or off-thread.
// This prevents a race from occurring where a second command buffer is executed
// before the worker thread has woken and executed the first one yet.
m_submit_semaphore.Wait();
}

void CommandBufferManager::WaitForWorkerThreadIdle()
{
// Drain the semaphore, then allow another request in the future.
@@ -215,8 +220,8 @@ void CommandBufferManager::WaitForGPUIdle()
void CommandBufferManager::WaitForFence(VkFence fence)
{
// Find the command buffer that this fence corresponds to.
size_t command_buffer_index = 0;
for (; command_buffer_index < m_frame_resources.size(); command_buffer_index++)
u32 command_buffer_index = 0;
for (; command_buffer_index < static_cast<u32>(m_frame_resources.size()); command_buffer_index++)
{
if (m_frame_resources[command_buffer_index].fence == fence)
break;
@@ -227,6 +232,9 @@ void CommandBufferManager::WaitForFence(VkFence fence)
if (!m_frame_resources[command_buffer_index].needs_fence_wait)
return;

// Ensure this command buffer has been submitted.
WaitForWorkerThreadIdle();

// Wait for this command buffer to be completed.
VkResult res =
vkWaitForFences(g_vulkan_context->GetDevice(), 1,
@@ -240,19 +248,11 @@ void CommandBufferManager::WaitForFence(VkFence fence)
}

void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
VkSemaphore wait_semaphore,
VkSemaphore signal_semaphore,
VkSwapchainKHR present_swap_chain,
uint32_t present_image_index)
{
FrameResources& resources = m_frame_resources[m_current_frame];

// Fire fence tracking callbacks. This can't happen on the worker thread.
// We invoke these before submitting so that any last-minute commands can be added.
for (const auto& iter : m_fence_point_callbacks)
iter.second.first(resources.command_buffers[1], resources.fence);

// End the current command buffer.
FrameResources& resources = m_frame_resources[m_current_frame];
for (VkCommandBuffer command_buffer : resources.command_buffers)
{
VkResult res = vkEndCommandBuffer(command_buffer);
@@ -266,14 +266,18 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
// This command buffer now has commands, so can't be re-used without waiting.
resources.needs_fence_wait = true;

// Grab the semaphore before submitting command buffer either on-thread or off-thread.
// This prevents a race from occurring where a second command buffer is executed
// before the worker thread has woken and executed the first one yet.
m_submit_semaphore.Wait();

// Submitting off-thread?
if (m_use_threaded_submission && submit_on_worker_thread)
{
// Push to the pending submit queue.
{
std::lock_guard<std::mutex> guard(m_pending_submit_lock);
m_pending_submits.push_back({m_current_frame, wait_semaphore, signal_semaphore,
present_swap_chain, present_image_index});
m_pending_submits.push_back({present_swap_chain, present_image_index, m_current_frame});
}

// Wake up the worker thread for a single iteration.
@@ -282,17 +286,18 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
else
{
// Pass through to normal submission path.
SubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore, present_swap_chain,
present_image_index);
SubmitCommandBuffer(m_current_frame, present_swap_chain, present_image_index);
}

// Switch to next cmdbuffer.
BeginCommandBuffer();
}

void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_semaphore,
VkSemaphore signal_semaphore,
void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
VkSwapchainKHR present_swap_chain,
uint32_t present_image_index)
u32 present_image_index)
{
FrameResources& resources = m_frame_resources[index];
FrameResources& resources = m_frame_resources[command_buffer_index];

// This may be executed on the worker thread, so don't modify any state of the manager class.
uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
@@ -307,22 +312,22 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se
nullptr};

// If the init command buffer did not have any commands recorded, don't submit it.
if (!m_frame_resources[index].init_command_buffer_used)
if (!resources.init_command_buffer_used)
{
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &m_frame_resources[index].command_buffers[1];
submit_info.pCommandBuffers = &resources.command_buffers[1];
}

if (wait_semaphore != VK_NULL_HANDLE)
if (resources.semaphore_used != VK_NULL_HANDLE)
{
submit_info.pWaitSemaphores = &wait_semaphore;
submit_info.pWaitSemaphores = &resources.semaphore;
submit_info.waitSemaphoreCount = 1;
}

if (signal_semaphore != VK_NULL_HANDLE)
if (present_swap_chain != VK_NULL_HANDLE)
{
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &signal_semaphore;
submit_info.pSignalSemaphores = &m_present_semaphore;
}

VkResult res =
@@ -337,11 +342,10 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se
if (present_swap_chain != VK_NULL_HANDLE)
{
// Should have a signal semaphore.
ASSERT(signal_semaphore != VK_NULL_HANDLE);
VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
nullptr,
1,
&signal_semaphore,
&m_present_semaphore,
1,
&present_swap_chain,
&present_image_index,
@@ -361,15 +365,15 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se
m_submit_semaphore.Post();
}

void CommandBufferManager::OnCommandBufferExecuted(size_t index)
void CommandBufferManager::OnCommandBufferExecuted(u32 index)
{
FrameResources& resources = m_frame_resources[index];

// Fire fence tracking callbacks.
for (auto iter = m_fence_point_callbacks.begin(); iter != m_fence_point_callbacks.end();)
for (auto iter = m_fence_callbacks.begin(); iter != m_fence_callbacks.end();)
{
auto backup_iter = iter++;
backup_iter->second.second(resources.fence);
backup_iter->second(resources.fence);
}

// Clean up all objects pending destruction on this command buffer
@@ -378,7 +382,7 @@ void CommandBufferManager::OnCommandBufferExecuted(size_t index)
resources.cleanup_resources.clear();
}

void CommandBufferManager::ActivateCommandBuffer()
void CommandBufferManager::BeginCommandBuffer()
{
// Move to the next command buffer.
m_current_frame = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
@@ -422,19 +426,7 @@ void CommandBufferManager::ActivateCommandBuffer()

// Reset upload command buffer state
resources.init_command_buffer_used = false;
}

void CommandBufferManager::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion)
{
VkFence pending_fence = GetCurrentCommandBufferFence();

// If we're waiting for completion, don't bother waking the worker thread.
PrepareToSubmitCommandBuffer();
SubmitCommandBuffer((submit_off_thread && wait_for_completion));
ActivateCommandBuffer();

if (wait_for_completion)
WaitForFence(pending_fence);
resources.semaphore_used = false;
}

void CommandBufferManager::DeferBufferDestruction(VkBuffer object)
@@ -479,20 +471,18 @@ void CommandBufferManager::DeferImageViewDestruction(VkImageView object)
[object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); });
}

void CommandBufferManager::AddFencePointCallback(
const void* key, const CommandBufferQueuedCallback& queued_callback,
const CommandBufferExecutedCallback& executed_callback)
void CommandBufferManager::AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback)
{
// Shouldn't be adding twice.
ASSERT(m_fence_point_callbacks.find(key) == m_fence_point_callbacks.end());
m_fence_point_callbacks.emplace(key, std::make_pair(queued_callback, executed_callback));
ASSERT(m_fence_callbacks.find(key) == m_fence_callbacks.end());
m_fence_callbacks.emplace(key, std::move(callback));
}

void CommandBufferManager::RemoveFencePointCallback(const void* key)
void CommandBufferManager::RemoveFenceSignaledCallback(const void* key)
{
auto iter = m_fence_point_callbacks.find(key);
ASSERT(iter != m_fence_point_callbacks.end());
m_fence_point_callbacks.erase(iter);
auto iter = m_fence_callbacks.find(key);
ASSERT(iter != m_fence_callbacks.end());
m_fence_callbacks.erase(iter);
}

std::unique_ptr<CommandBufferManager> g_command_buffer_mgr;
@@ -22,7 +22,6 @@
#include "VideoCommon/VideoCommon.h"

#include "VideoBackends/Vulkan/Constants.h"
#include "VideoBackends/Vulkan/Util.h"

namespace Vulkan
{
@@ -55,8 +54,14 @@ class CommandBufferManager
// Gets the fence that will be signaled when the currently executing command buffer is
// queued and executed. Do not wait for this fence before the buffer is executed.
VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; }
// Ensure the worker thread has submitted the previous frame's command buffer.
void PrepareToSubmitCommandBuffer();

// Returns the semaphore for the current command buffer, which can be used to ensure the
// swap chain image is ready before the command buffer executes.
VkSemaphore GetCurrentCommandBufferSemaphore()
{
m_frame_resources[m_current_frame].semaphore_used = true;
return m_frame_resources[m_current_frame].semaphore;
}

// Ensure that the worker thread has submitted any previous command buffers and is idle.
void WaitForWorkerThreadIdle();
@@ -70,17 +75,12 @@ class CommandBufferManager
void WaitForFence(VkFence fence);

void SubmitCommandBuffer(bool submit_on_worker_thread,
VkSemaphore wait_semaphore = VK_NULL_HANDLE,
VkSemaphore signal_semaphore = VK_NULL_HANDLE,
VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE,
uint32_t present_image_index = 0xFFFFFFFF);

void ActivateCommandBuffer();

void ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion);

// Was the last present submitted to the queue a failure? If so, we must recreate our swapchain.
bool CheckLastPresentFail() { return m_present_failed_flag.TestAndClear(); }

// Schedule a vulkan resource for destruction later on. This will occur when the command buffer
// is next re-used, and the GPU has finished working with the specified resource.
void DeferBufferDestruction(VkBuffer object);
@@ -93,44 +93,42 @@ class CommandBufferManager
// Instruct the manager to fire the specified callback when a fence is flagged to be signaled.
// This happens when command buffers are executed, and can be tested if signaled, which means
// that all commands up to the point when the callback was fired have completed.
using CommandBufferQueuedCallback = std::function<void(VkCommandBuffer, VkFence)>;
using CommandBufferExecutedCallback = std::function<void(VkFence)>;

void AddFencePointCallback(const void* key, const CommandBufferQueuedCallback& queued_callback,
const CommandBufferExecutedCallback& executed_callback);

void RemoveFencePointCallback(const void* key);
using FenceSignaledCallback = std::function<void(VkFence)>;
void AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback);
void RemoveFenceSignaledCallback(const void* key);

private:
bool CreateCommandBuffers();
void DestroyCommandBuffers();

bool CreateSubmitThread();

void SubmitCommandBuffer(size_t index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore,
VkSwapchainKHR present_swap_chain, uint32_t present_image_index);
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
u32 present_image_index);
void BeginCommandBuffer();

void OnCommandBufferExecuted(size_t index);
void OnCommandBufferExecuted(u32 index);

struct FrameResources
{
// [0] - Init (upload) command buffer, [1] - draw command buffer
VkCommandPool command_pool;
std::array<VkCommandBuffer, 2> command_buffers;
VkDescriptorPool descriptor_pool;
VkFence fence;
bool init_command_buffer_used;
bool needs_fence_wait;
VkCommandPool command_pool = VK_NULL_HANDLE;
std::array<VkCommandBuffer, 2> command_buffers = {};
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
VkFence fence = VK_NULL_HANDLE;
VkSemaphore semaphore = VK_NULL_HANDLE;
bool init_command_buffer_used = false;
bool semaphore_used = false;
bool needs_fence_wait = false;

std::vector<std::function<void()>> cleanup_resources;
};

std::array<FrameResources, NUM_COMMAND_BUFFERS> m_frame_resources = {};
size_t m_current_frame;
std::array<FrameResources, NUM_COMMAND_BUFFERS> m_frame_resources;
u32 m_current_frame;

// callbacks when a fence point is set
std::map<const void*, std::pair<CommandBufferQueuedCallback, CommandBufferExecutedCallback>>
m_fence_point_callbacks;
std::map<const void*, FenceSignaledCallback> m_fence_callbacks;

// Threaded command buffer execution
// Semaphore determines when a command buffer can be queued
@@ -139,12 +137,11 @@ class CommandBufferManager
std::unique_ptr<Common::BlockingLoop> m_submit_loop;
struct PendingCommandBufferSubmit
{
size_t index;
VkSemaphore wait_semaphore;
VkSemaphore signal_semaphore;
VkSwapchainKHR present_swap_chain;
uint32_t present_image_index;
u32 present_image_index;
u32 command_buffer_index;
};
VkSemaphore m_present_semaphore = VK_NULL_HANDLE;
std::deque<PendingCommandBufferSubmit> m_pending_submits;
std::mutex m_pending_submit_lock;
Common::Flag m_present_failed_flag;
@@ -26,48 +26,36 @@ enum STAGING_BUFFER_TYPE
// Descriptor set layouts
enum DESCRIPTOR_SET_LAYOUT
{
DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER,
DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS,
DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS,
DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS,
DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS,
DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS,
DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS,
DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS,
DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER,
DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS,
DESCRIPTOR_SET_LAYOUT_COMPUTE,
NUM_DESCRIPTOR_SET_LAYOUTS
};

// Descriptor set bind points
enum DESCRIPTOR_SET_BIND_POINT
{
DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS,
DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS,
DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER,
NUM_DESCRIPTOR_SET_BIND_POINTS
};

// We use four pipeline layouts:
// - Standard
// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS)
// - 8 combined image samplers (accessible from PS)
// - 1 SSBO accessible from PS if supported
// - Push Constant
// - Same as standard, plus 128 bytes of push constants, accessible from all stages.
// - Texture Decoding
// - Same as push constant, plus a single texel buffer accessible from PS.
// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) [set=0, binding=0-2]
// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7]
// - 1 SSBO accessible from PS if supported [set=2, binding=0]
// - Utility
// - 1 combined UBO, accessible from VS/GS/PS [set=0, binding=0]
// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7]
// - 1 texel buffer (accessible from PS) [set=1, binding=8]
// - Compute
// - 1 uniform buffer [set=0, binding=0]
// - 4 combined image samplers [set=0, binding=1-4]
// - 1 texel buffer [set=0, binding=5]
// - 1 storage image [set=0, binding=6]
// - 128 bytes of push constants
// - 2 combined image samplers [set=0, binding=1-2]
// - 2 texel buffers [set=0, binding=3-4]
// - 1 storage image [set=0, binding=5]
//
// All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers).
// The third descriptor set (see bind points above) is used for storage or texel buffers.
//
enum PIPELINE_LAYOUT
{
PIPELINE_LAYOUT_STANDARD,
PIPELINE_LAYOUT_PUSH_CONSTANT,
PIPELINE_LAYOUT_TEXTURE_CONVERSION,
PIPELINE_LAYOUT_UTILITY,
PIPELINE_LAYOUT_COMPUTE,
NUM_PIPELINE_LAYOUTS
@@ -83,53 +71,22 @@ enum UNIFORM_BUFFER_DESCRIPTOR_SET_BINDING
};

// Maximum number of attributes per vertex (we don't have any more than this?)
constexpr size_t MAX_VERTEX_ATTRIBUTES = 16;
constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;

// Number of pixel shader texture slots
constexpr size_t NUM_PIXEL_SHADER_SAMPLERS = 8;

// Total number of binding points in the pipeline layout
constexpr size_t TOTAL_PIPELINE_BINDING_POINTS =
NUM_UBO_DESCRIPTOR_SET_BINDINGS + NUM_PIXEL_SHADER_SAMPLERS + 1;
constexpr u32 NUM_PIXEL_SHADER_SAMPLERS = 8;
constexpr u32 NUM_COMPUTE_SHADER_SAMPLERS = 2;

// Format of EFB textures
constexpr VkFormat EFB_COLOR_TEXTURE_FORMAT = VK_FORMAT_R8G8B8A8_UNORM;
constexpr VkFormat EFB_DEPTH_TEXTURE_FORMAT = VK_FORMAT_D32_SFLOAT;
constexpr VkFormat EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT = VK_FORMAT_R32_SFLOAT;

// Format of texturecache textures
constexpr VkFormat TEXTURECACHE_TEXTURE_FORMAT = VK_FORMAT_R8G8B8A8_UNORM;
// Number of texel buffer binding points.
constexpr u32 NUM_COMPUTE_TEXEL_BUFFERS = 2;

// Textures that don't fit into this buffer will be uploaded with a separate buffer (see below).
constexpr size_t INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
constexpr u32 TEXTURE_UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;

// Textures greater than 1024*1024 will be put in staging textures that are released after
// execution instead. A 2048x2048 texture is 16MB, and we'd only fit four of these in our
// streaming buffer and be blocking frequently. Games are unlikely to have textures this
// large anyway, so it's only really an issue for HD texture packs, and memory is not
// a limiting factor in these scenarios anyway.
constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 8;

// Streaming uniform buffer size
constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr size_t MAXIMUM_UNIFORM_STREAM_BUFFER_SIZE = 32 * 1024 * 1024;

// Texel buffer size for palette and texture decoding.
constexpr size_t TEXTURE_CONVERSION_TEXEL_BUFFER_SIZE = 8 * 1024 * 1024;

// Push constant buffer size for utility shaders
constexpr u32 PUSH_CONSTANT_BUFFER_SIZE = 128;

// Minimum number of draw calls per command buffer when attempting to preempt a readback operation.
constexpr u32 MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK = 10;

// Multisampling state info that we don't expose in VideoCommon.
union MultisamplingState
{
BitField<0, 5, u32> samples; // 1-16
BitField<5, 1, u32> per_sample_shading; // SSAA
u32 hex;
};

constexpr u32 STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4;
} // namespace Vulkan

This file was deleted.

This file was deleted.

Large diffs are not rendered by default.

@@ -16,7 +16,6 @@
#include "Common/LinearDiskCache.h"

#include "VideoBackends/Vulkan/Constants.h"
#include "VideoBackends/Vulkan/Texture2D.h"

#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/PixelShaderGen.h"
@@ -27,6 +26,7 @@ namespace Vulkan
{
class CommandBufferManager;
class VertexFormat;
class VKTexture;
class StreamBuffer;

class ObjectCache
@@ -35,66 +35,63 @@ class ObjectCache
ObjectCache();
~ObjectCache();

// Perform at startup, create descriptor layouts, compiles all static shaders.
bool Initialize();
void Shutdown();

// Descriptor set layout accessor. Used for allocating descriptor sets.
VkDescriptorSetLayout GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT layout) const
{
return m_descriptor_set_layouts[layout];
}

// Pipeline layout accessor. Used to fill in required field in PipelineInfo.
VkPipelineLayout GetPipelineLayout(PIPELINE_LAYOUT layout) const
{
return m_pipeline_layouts[layout];
}
// Shared utility shader resources
VertexFormat* GetUtilityShaderVertexFormat() const
{
return m_utility_shader_vertex_format.get();
}
StreamBuffer* GetUtilityShaderVertexBuffer() const
{
return m_utility_shader_vertex_buffer.get();
}
StreamBuffer* GetUtilityShaderUniformBuffer() const
{
return m_utility_shader_uniform_buffer.get();
}

// Staging buffer for textures.
StreamBuffer* GetTextureUploadBuffer() const { return m_texture_upload_buffer.get(); }

// Static samplers
VkSampler GetPointSampler() const { return m_point_sampler; }
VkSampler GetLinearSampler() const { return m_linear_sampler; }
VkSampler GetSampler(const SamplerState& info);

// Dummy image for samplers that are unbound
Texture2D* GetDummyImage() const { return m_dummy_texture.get(); }
VkImageView GetDummyImageView() const { return m_dummy_texture->GetView(); }
// Render pass cache.
VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, u32 multisamples,
VkAttachmentLoadOp load_op);

// Perform at startup, create descriptor layouts, compiles all static shaders.
bool Initialize();
// Pipeline cache. Used when creating pipelines for drivers to store compiled programs.
VkPipelineCache GetPipelineCache() const { return m_pipeline_cache; }

// Clear sampler cache, use when anisotropy mode changes
// WARNING: Ensure none of the objects from here are in use when calling
void ClearSamplerCache();

// Saves the pipeline cache to disk. Call when shutting down.
void SavePipelineCache();

// Reload pipeline cache. Call when host config changes.
void ReloadPipelineCache();

private:
bool CreateDescriptorSetLayouts();
void DestroyDescriptorSetLayouts();
bool CreatePipelineLayouts();
void DestroyPipelineLayouts();
bool CreateUtilityShaderVertexFormat();
bool CreateStaticSamplers();
void DestroySamplers();
void DestroyRenderPassCache();
bool CreatePipelineCache();
bool LoadPipelineCache();
bool ValidatePipelineCache(const u8* data, size_t data_length);
void DestroyPipelineCache();

std::array<VkDescriptorSetLayout, NUM_DESCRIPTOR_SET_LAYOUTS> m_descriptor_set_layouts = {};
std::array<VkPipelineLayout, NUM_PIPELINE_LAYOUTS> m_pipeline_layouts = {};

std::unique_ptr<VertexFormat> m_utility_shader_vertex_format;
std::unique_ptr<StreamBuffer> m_utility_shader_vertex_buffer;
std::unique_ptr<StreamBuffer> m_utility_shader_uniform_buffer;
std::unique_ptr<StreamBuffer> m_texture_upload_buffer;

VkSampler m_point_sampler = VK_NULL_HANDLE;
@@ -103,11 +100,15 @@ class ObjectCache
std::map<SamplerState, VkSampler> m_sampler_cache;

// Dummy image for samplers that are unbound
std::unique_ptr<Texture2D> m_dummy_texture;
std::unique_ptr<VKTexture> m_dummy_texture;

// Render pass cache
using RenderPassCacheKey = std::tuple<VkFormat, VkFormat, u32, VkAttachmentLoadOp>;
std::map<RenderPassCacheKey, VkRenderPass> m_render_pass_cache;

// pipeline cache
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
std::string m_pipeline_cache_filename;
};

extern std::unique_ptr<ObjectCache> g_object_cache;
@@ -13,20 +13,18 @@
#include "Common/MsgHandler.h"

#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/Renderer.h"
#include "VideoBackends/Vulkan/StagingBuffer.h"
#include "VideoBackends/Vulkan/StateTracker.h"
#include "VideoBackends/Vulkan/Util.h"
#include "VideoBackends/Vulkan/VulkanContext.h"

namespace Vulkan
{
PerfQuery::PerfQuery()
{
}
PerfQuery::PerfQuery() = default;

PerfQuery::~PerfQuery()
{
g_command_buffer_mgr->RemoveFencePointCallback(this);
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);

if (m_query_pool != VK_NULL_HANDLE)
vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr);
@@ -51,11 +49,8 @@ bool PerfQuery::Initialize()
return false;
}

g_command_buffer_mgr->AddFencePointCallback(
this,
std::bind(&PerfQuery::OnCommandBufferQueued, this, std::placeholders::_1,
std::placeholders::_2),
std::bind(&PerfQuery::OnCommandBufferExecuted, this, std::placeholders::_1));
g_command_buffer_mgr->AddFenceSignaledCallback(
this, std::bind(&PerfQuery::OnFenceSignaled, this, std::placeholders::_1));

return true;
}
@@ -92,9 +87,6 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
// TODO: Is this needed?
StateTracker::GetInstance()->BeginRenderPass();
vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index, flags);

// Prevent background command buffer submission while the query is active.
StateTracker::GetInstance()->SetBackgroundCommandBufferExecution(false);
}
}

@@ -105,8 +97,6 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
// DisableQuery should be called for each EnableQuery, so subtract one to get the previous one.
u32 index = (m_query_read_pos + m_query_count - 1) % PERF_QUERY_BUFFER_SIZE;
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index);
StateTracker::GetInstance()->SetBackgroundCommandBufferExecution(true);
DEBUG_LOG(VIDEO, "end query %u", index);
}
}

@@ -198,40 +188,42 @@ bool PerfQuery::CreateReadbackBuffer()
return true;
}

void PerfQuery::QueueCopyQueryResults(VkCommandBuffer command_buffer, VkFence fence,
u32 start_index, u32 query_count)
void PerfQuery::QueueCopyQueryResults(u32 start_index, u32 query_count)
{
DEBUG_LOG(VIDEO, "queue copy of queries %u-%u", start_index, start_index + query_count - 1);

// Transition buffer for GPU write
// TODO: Is this needed?
m_readback_buffer->PrepareForGPUWrite(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);

// Copy from queries -> buffer
vkCmdCopyQueryPoolResults(command_buffer, m_query_pool, start_index, query_count,
m_readback_buffer->GetBuffer(), start_index * sizeof(PerfQueryDataType),
sizeof(PerfQueryDataType), VK_QUERY_RESULT_WAIT_BIT);
vkCmdCopyQueryPoolResults(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool,
start_index, query_count, m_readback_buffer->GetBuffer(),
start_index * sizeof(PerfQueryDataType), sizeof(PerfQueryDataType),
VK_QUERY_RESULT_WAIT_BIT);

// Prepare for host readback
m_readback_buffer->FlushGPUCache(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT);
m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);

// Reset queries so they're ready to use again
vkCmdResetQueryPool(command_buffer, m_query_pool, start_index, query_count);
vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, start_index,
query_count);

// Flag all queries as available, but with a fence that has to be completed first
for (u32 i = 0; i < query_count; i++)
{
u32 index = start_index + i;
ActiveQuery& entry = m_query_buffer[index];
entry.pending_fence = fence;
entry.pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
entry.available = true;
entry.active = false;
}
}

void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence)
void PerfQuery::FlushQueries()
{
// Flag all pending queries that aren't available as available after execution.
u32 copy_start_index = 0;
@@ -254,7 +246,7 @@ void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fe
ASSERT(entry.active);
if (index < copy_start_index)
{
QueueCopyQueryResults(command_buffer, fence, copy_start_index, copy_count);
QueueCopyQueryResults(copy_start_index, copy_count);
copy_start_index = index;
copy_count = 0;
}
@@ -266,10 +258,10 @@ void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fe
}

if (copy_count > 0)
QueueCopyQueryResults(command_buffer, fence, copy_start_index, copy_count);
QueueCopyQueryResults(copy_start_index, copy_count);
}

void PerfQuery::OnCommandBufferExecuted(VkFence fence)
void PerfQuery::OnFenceSignaled(VkFence fence)
{
// Need to save these since ProcessResults will modify them.
u32 query_read_pos = m_query_read_pos;
@@ -350,7 +342,7 @@ void PerfQuery::NonBlockingPartialFlush()
// Submit a command buffer in the background if the front query is not bound to one.
// Ideally this will complete before the buffer fills.
if (m_query_buffer[m_query_read_pos].pending_fence == VK_NULL_HANDLE)
Util::ExecuteCurrentCommandsAndRestoreState(true, false);
Renderer::GetInstance()->ExecuteCommandBuffer(true, false);
}

void PerfQuery::BlockingPartialFlush()
@@ -364,7 +356,7 @@ void PerfQuery::BlockingPartialFlush()
{
// This will callback OnCommandBufferQueued which will set the fence on the entry.
// We wait for completion, which will also call OnCommandBufferExecuted, and clear the fence.
Util::ExecuteCurrentCommandsAndRestoreState(false, true);
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);
}
else
{
@@ -373,4 +365,4 @@ void PerfQuery::BlockingPartialFlush()
g_command_buffer_mgr->WaitForFence(entry.pending_fence);
}
}
}
} // namespace Vulkan
@@ -24,6 +24,7 @@ class PerfQuery : public PerfQueryBase
static PerfQuery* GetInstance();

bool Initialize();
void FlushQueries();

void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
@@ -43,12 +44,11 @@ class PerfQuery : public PerfQueryBase

bool CreateQueryPool();
bool CreateReadbackBuffer();
void QueueCopyQueryResults(VkCommandBuffer command_buffer, VkFence fence, u32 start_index,
u32 query_count);
void QueueCopyQueryResults(u32 start_index, u32 query_count);
void ProcessResults(u32 start_index, u32 query_count);

void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence);
void OnCommandBufferExecuted(VkFence fence);
void OnFenceSignaled(VkFence fence);

void NonBlockingPartialFlush();
void BlockingPartialFlush();