@@ -2,113 +2,64 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoCommon/IndexGenerator.h"

#include <array>
#include <cstddef>
#include <cstring>

#include "Common/CommonTypes.h"
#include "Common/Compiler.h"
#include "Common/Logging/Log.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/VideoConfig.h"

// Init
u16* IndexGenerator::index_buffer_current;
u16* IndexGenerator::BASEIptr;
u32 IndexGenerator::base_index;

static const u16 s_primitive_restart = UINT16_MAX;

static u16* (*primitive_table[8])(u16*, u32, u32);

void IndexGenerator::Init()
namespace
{
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<true>;
primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<true>;
primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
}
else
{
primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<false>;
primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<false>;
primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
}
primitive_table[OpcodeDecoder::GX_DRAW_LINES] = &AddLineList;
primitive_table[OpcodeDecoder::GX_DRAW_LINE_STRIP] = &AddLineStrip;
primitive_table[OpcodeDecoder::GX_DRAW_POINTS] = &AddPoints;
}

void IndexGenerator::Start(u16* Indexptr)
{
index_buffer_current = Indexptr;
BASEIptr = Indexptr;
base_index = 0;
}
constexpr u16 s_primitive_restart = UINT16_MAX;

void IndexGenerator::AddIndices(int primitive, u32 numVerts)
{
index_buffer_current = primitive_table[primitive](index_buffer_current, numVerts, base_index);
base_index += numVerts;
}

void IndexGenerator::AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices)
{
std::memcpy(index_buffer_current, indices, sizeof(u16) * num_indices);
index_buffer_current += num_indices;
base_index += num_vertices;
}

// Triangles
template <bool pr>
DOLPHIN_FORCE_INLINE u16* IndexGenerator::WriteTriangle(u16* Iptr, u32 index1, u32 index2,
u32 index3)
u16* WriteTriangle(u16* index_ptr, u32 index1, u32 index2, u32 index3)
{
*Iptr++ = index1;
*Iptr++ = index2;
*Iptr++ = index3;
if (pr)
*Iptr++ = s_primitive_restart;
return Iptr;
*index_ptr++ = index1;
*index_ptr++ = index2;
*index_ptr++ = index3;
if constexpr (pr)
*index_ptr++ = s_primitive_restart;
return index_ptr;
}

template <bool pr>
u16* IndexGenerator::AddList(u16* Iptr, u32 const numVerts, u32 index)
u16* AddList(u16* index_ptr, u32 num_verts, u32 index)
{
for (u32 i = 2; i < numVerts; i += 3)
for (u32 i = 2; i < num_verts; i += 3)
{
Iptr = WriteTriangle<pr>(Iptr, index + i - 2, index + i - 1, index + i);
index_ptr = WriteTriangle<pr>(index_ptr, index + i - 2, index + i - 1, index + i);
}
return Iptr;
return index_ptr;
}

template <bool pr>
u16* IndexGenerator::AddStrip(u16* Iptr, u32 const numVerts, u32 index)
u16* AddStrip(u16* index_ptr, u32 num_verts, u32 index)
{
if (pr)
if constexpr (pr)
{
for (u32 i = 0; i < numVerts; ++i)
for (u32 i = 0; i < num_verts; ++i)
{
*Iptr++ = index + i;
*index_ptr++ = index + i;
}
*Iptr++ = s_primitive_restart;
*index_ptr++ = s_primitive_restart;
}
else
{
bool wind = false;
for (u32 i = 2; i < numVerts; ++i)
for (u32 i = 2; i < num_verts; ++i)
{
Iptr = WriteTriangle<pr>(Iptr, index + i - 2, index + i - !wind, index + i - wind);
index_ptr = WriteTriangle<pr>(index_ptr, index + i - 2, index + i - !wind, index + i - wind);

wind ^= true;
}
}
return Iptr;
return index_ptr;
}

/**
@@ -131,37 +82,37 @@ u16* IndexGenerator::AddStrip(u16* Iptr, u32 const numVerts, u32 index)
*/

template <bool pr>
u16* IndexGenerator::AddFan(u16* Iptr, u32 numVerts, u32 index)
u16* AddFan(u16* index_ptr, u32 num_verts, u32 index)
{
u32 i = 2;

if (pr)
if constexpr (pr)
{
for (; i + 3 <= numVerts; i += 3)
for (; i + 3 <= num_verts; i += 3)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i + 0;
*Iptr++ = index;
*Iptr++ = index + i + 1;
*Iptr++ = index + i + 2;
*Iptr++ = s_primitive_restart;
*index_ptr++ = index + i - 1;
*index_ptr++ = index + i + 0;
*index_ptr++ = index;
*index_ptr++ = index + i + 1;
*index_ptr++ = index + i + 2;
*index_ptr++ = s_primitive_restart;
}

for (; i + 2 <= numVerts; i += 2)
for (; i + 2 <= num_verts; i += 2)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i + 0;
*Iptr++ = index;
*Iptr++ = index + i + 1;
*Iptr++ = s_primitive_restart;
*index_ptr++ = index + i - 1;
*index_ptr++ = index + i + 0;
*index_ptr++ = index;
*index_ptr++ = index + i + 1;
*index_ptr++ = s_primitive_restart;
}
}

for (; i < numVerts; ++i)
for (; i < num_verts; ++i)
{
Iptr = WriteTriangle<pr>(Iptr, index, index + i - 1, index + i);
index_ptr = WriteTriangle<pr>(index_ptr, index, index + i - 1, index + i);
}
return Iptr;
return index_ptr;
}

/*
@@ -182,77 +133,122 @@ u16* IndexGenerator::AddFan(u16* Iptr, u32 numVerts, u32 index)
* ZWW do this for sun rays
*/
template <bool pr>
u16* IndexGenerator::AddQuads(u16* Iptr, u32 numVerts, u32 index)
u16* AddQuads(u16* index_ptr, u32 num_verts, u32 index)
{
u32 i = 3;
for (; i < numVerts; i += 4)
for (; i < num_verts; i += 4)
{
if (pr)
if constexpr (pr)
{
*Iptr++ = index + i - 2;
*Iptr++ = index + i - 1;
*Iptr++ = index + i - 3;
*Iptr++ = index + i - 0;
*Iptr++ = s_primitive_restart;
*index_ptr++ = index + i - 2;
*index_ptr++ = index + i - 1;
*index_ptr++ = index + i - 3;
*index_ptr++ = index + i - 0;
*index_ptr++ = s_primitive_restart;
}
else
{
Iptr = WriteTriangle<pr>(Iptr, index + i - 3, index + i - 2, index + i - 1);
Iptr = WriteTriangle<pr>(Iptr, index + i - 3, index + i - 1, index + i - 0);
index_ptr = WriteTriangle<pr>(index_ptr, index + i - 3, index + i - 2, index + i - 1);
index_ptr = WriteTriangle<pr>(index_ptr, index + i - 3, index + i - 1, index + i - 0);
}
}

// three vertices remaining, so render a triangle
if (i == numVerts)
if (i == num_verts)
{
Iptr =
WriteTriangle<pr>(Iptr, index + numVerts - 3, index + numVerts - 2, index + numVerts - 1);
index_ptr = WriteTriangle<pr>(index_ptr, index + num_verts - 3, index + num_verts - 2,
index + num_verts - 1);
}
return Iptr;
return index_ptr;
}

template <bool pr>
u16* IndexGenerator::AddQuads_nonstandard(u16* Iptr, u32 numVerts, u32 index)
u16* AddQuads_nonstandard(u16* index_ptr, u32 num_verts, u32 index)
{
WARN_LOG(VIDEO, "Non-standard primitive drawing command GL_DRAW_QUADS_2");
return AddQuads<pr>(Iptr, numVerts, index);
return AddQuads<pr>(index_ptr, num_verts, index);
}

// Lines
u16* IndexGenerator::AddLineList(u16* Iptr, u32 numVerts, u32 index)
u16* AddLineList(u16* index_ptr, u32 num_verts, u32 index)
{
for (u32 i = 1; i < numVerts; i += 2)
for (u32 i = 1; i < num_verts; i += 2)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i;
*index_ptr++ = index + i - 1;
*index_ptr++ = index + i;
}
return Iptr;
return index_ptr;
}

// shouldn't be used as strips as LineLists are much more common
// Shouldn't be used as strips as LineLists are much more common
// so converting them to lists
u16* IndexGenerator::AddLineStrip(u16* Iptr, u32 numVerts, u32 index)
u16* AddLineStrip(u16* index_ptr, u32 num_verts, u32 index)
{
for (u32 i = 1; i < numVerts; ++i)
for (u32 i = 1; i < num_verts; ++i)
{
*Iptr++ = index + i - 1;
*Iptr++ = index + i;
*index_ptr++ = index + i - 1;
*index_ptr++ = index + i;
}
return Iptr;
return index_ptr;
}

// Points
u16* IndexGenerator::AddPoints(u16* Iptr, u32 numVerts, u32 index)
u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index)
{
for (u32 i = 0; i != numVerts; ++i)
for (u32 i = 0; i != num_verts; ++i)
{
*index_ptr++ = index + i;
}
return index_ptr;
}
} // Anonymous namespace

void IndexGenerator::Init()
{
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<true>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<true>;
}
else
{
*Iptr++ = index + i;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS] = AddQuads<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_QUADS_2] = AddQuads_nonstandard<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLES] = AddList<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[OpcodeDecoder::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
}
return Iptr;
m_primitive_table[OpcodeDecoder::GX_DRAW_LINES] = AddLineList;
m_primitive_table[OpcodeDecoder::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[OpcodeDecoder::GX_DRAW_POINTS] = AddPoints;
}

u32 IndexGenerator::GetRemainingIndices()
void IndexGenerator::Start(u16* index_ptr)
{
u32 max_index = 65534; // -1 is reserved for primitive restart (ogl + dx11)
return max_index - base_index;
m_index_buffer_current = index_ptr;
m_base_index_ptr = index_ptr;
m_base_index = 0;
}

void IndexGenerator::AddIndices(int primitive, u32 num_vertices)
{
m_index_buffer_current =
m_primitive_table[primitive](m_index_buffer_current, num_vertices, m_base_index);
m_base_index += num_vertices;
}

void IndexGenerator::AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices)
{
std::memcpy(m_index_buffer_current, indices, sizeof(u16) * num_indices);
m_index_buffer_current += num_indices;
m_base_index += num_vertices;
}

u32 IndexGenerator::GetRemainingIndices() const
{
// -1 is reserved for primitive restart (OGL + DX11)
constexpr u32 max_index = 65534;

return max_index - m_base_index;
}
@@ -7,48 +7,29 @@

#pragma once

#include <array>
#include "Common/CommonTypes.h"

class IndexGenerator
{
public:
// Init
static void Init();
static void Start(u16* Indexptr);
void Init();
void Start(u16* index_ptr);

static void AddIndices(int primitive, u32 numVertices);
void AddIndices(int primitive, u32 num_vertices);

static void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices);
void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices);

// returns numprimitives
static u32 GetNumVerts() { return base_index; }
static u32 GetIndexLen() { return (u32)(index_buffer_current - BASEIptr); }
static u32 GetRemainingIndices();
u32 GetNumVerts() const { return m_base_index; }
u32 GetIndexLen() const { return static_cast<u32>(m_index_buffer_current - m_base_index_ptr); }
u32 GetRemainingIndices() const;

private:
// Triangles
template <bool pr>
static u16* AddList(u16* Iptr, u32 numVerts, u32 index);
template <bool pr>
static u16* AddStrip(u16* Iptr, u32 numVerts, u32 index);
template <bool pr>
static u16* AddFan(u16* Iptr, u32 numVerts, u32 index);
template <bool pr>
static u16* AddQuads(u16* Iptr, u32 numVerts, u32 index);
template <bool pr>
static u16* AddQuads_nonstandard(u16* Iptr, u32 numVerts, u32 index);

// Lines
static u16* AddLineList(u16* Iptr, u32 numVerts, u32 index);
static u16* AddLineStrip(u16* Iptr, u32 numVerts, u32 index);

// Points
static u16* AddPoints(u16* Iptr, u32 numVerts, u32 index);

template <bool pr>
static u16* WriteTriangle(u16* Iptr, u32 index1, u32 index2, u32 index3);

static u16* index_buffer_current;
static u16* BASEIptr;
static u32 base_index;
u16* m_index_buffer_current = nullptr;
u16* m_base_index_ptr = nullptr;
u32 m_base_index = 0;

using PrimitiveFunction = u16* (*)(u16*, u32, u32);
std::array<PrimitiveFunction, 8> m_primitive_table{};
};
@@ -283,8 +283,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo

count = loader->RunVertices(src, dst, count);

IndexGenerator::AddIndices(primitive, count);

g_vertex_manager->AddIndices(primitive, count);
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);

ADDSTAT(g_stats.this_frame.num_prims, count);
@@ -88,6 +88,7 @@ VertexManagerBase::~VertexManagerBase() = default;

bool VertexManagerBase::Initialize()
{
m_index_generator.Init();
return true;
}

@@ -96,6 +97,11 @@ u32 VertexManagerBase::GetRemainingSize() const
return static_cast<u32>(m_end_buffer_pointer - m_cur_buffer_pointer);
}

void VertexManagerBase::AddIndices(int primitive, u32 num_vertices)
{
m_index_generator.AddIndices(primitive, num_vertices);
}

DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride,
bool cullall)
{
@@ -120,12 +126,12 @@ DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count,

// Check for size in buffer, if the buffer gets full, call Flush()
if (!m_is_flushed &&
(count > IndexGenerator::GetRemainingIndices() || count > GetRemainingIndices(primitive) ||
(count > m_index_generator.GetRemainingIndices() || count > GetRemainingIndices(primitive) ||
needed_vertex_bytes > GetRemainingSize()))
{
Flush();

if (count > IndexGenerator::GetRemainingIndices())
if (count > m_index_generator.GetRemainingIndices())
ERROR_LOG(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush.");
if (count > GetRemainingIndices(primitive))
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
@@ -145,7 +151,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count,
// This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data();
m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size();
IndexGenerator::Start(m_cpu_index_buffer.data());
m_index_generator.Start(m_cpu_index_buffer.data());
}
else
{
@@ -163,9 +169,9 @@ void VertexManagerBase::FlushData(u32 count, u32 stride)
m_cur_buffer_pointer += count * stride;
}

u32 VertexManagerBase::GetRemainingIndices(int primitive)
u32 VertexManagerBase::GetRemainingIndices(int primitive) const
{
u32 index_len = MAXIBUFFERSIZE - IndexGenerator::GetIndexLen();
const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen();

if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
@@ -234,7 +240,7 @@ void VertexManagerBase::ResetBuffer(u32 vertex_stride)
m_base_buffer_pointer = m_cpu_vertex_buffer.data();
m_cur_buffer_pointer = m_cpu_vertex_buffer.data();
m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size();
IndexGenerator::Start(m_cpu_index_buffer.data());
m_index_generator.Start(m_cpu_index_buffer.data());
}

void VertexManagerBase::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
@@ -288,7 +294,7 @@ void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_s
m_cur_buffer_pointer += copy_size;
}
if (indices)
IndexGenerator::AddExternalIndices(indices, num_indices, num_vertices);
m_index_generator.AddExternalIndices(indices, num_indices, num_vertices);

CommitBuffer(num_vertices, vertex_stride, num_indices, out_base_vertex, out_base_index);
}
@@ -413,9 +419,9 @@ void VertexManagerBase::Flush()
{
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
// must be careful to not upload any utility vertices, as the binding will be lost otherwise.
const u32 num_indices = IndexGenerator::GetIndexLen();
const u32 num_indices = m_index_generator.GetIndexLen();
u32 base_vertex, base_index;
CommitBuffer(IndexGenerator::GetNumVerts(),
CommitBuffer(m_index_generator.GetNumVerts(),
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
&base_vertex, &base_index);

@@ -9,6 +9,7 @@

#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/ShaderCache.h"

@@ -65,6 +66,7 @@ class VertexManagerBase
virtual bool Initialize();

PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
void AddIndices(int primitive, u32 num_vertices);
DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall);
void FlushData(u32 count, u32 stride);

@@ -134,7 +136,7 @@ class VertexManagerBase
virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex);

u32 GetRemainingSize() const;
static u32 GetRemainingIndices(int primitive);
u32 GetRemainingIndices(int primitive) const;

void CalculateZSlope(NativeVertexFormat* format);
void LoadTextures();
@@ -159,6 +161,8 @@ class VertexManagerBase
bool m_blending_state_changed = true;
bool m_cull_all = false;

IndexGenerator m_index_generator;

private:
// Minimum number of draws per command buffer when attempting to preempt a readback operation.
static constexpr u32 MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK = 10;
@@ -270,7 +270,6 @@ void VideoBackendBase::InitializeShared()
PixelEngine::Init();
BPInit();
VertexLoaderManager::Init();
IndexGenerator::Init();
VertexShaderManager::Init();
GeometryShaderManager::Init();
PixelShaderManager::Init();