-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
VideoCommon: Add class for quickly transforming and culling vertices …
…on the CPU
- Loading branch information
1 parent
421af09
commit f87547b
Showing
9 changed files
with
994 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
// Copyright 2022 Dolphin Emulator Project | ||
// SPDX-License-Identifier: GPL-2.0-or-later | ||
|
||
#include "VideoCommon/CPUCull.h" | ||
|
||
#include "Common/Assert.h" | ||
#include "Common/CPUDetect.h" | ||
#include "Common/MemoryUtil.h" | ||
|
||
#include "VideoCommon/CPMemory.h" | ||
#include "VideoCommon/VertexManagerBase.h" | ||
#include "VideoCommon/VertexShaderManager.h" | ||
#include "VideoCommon/VideoConfig.h" | ||
#include "VideoCommon/XFMemory.h" | ||
|
||
#if defined(_M_X86) || defined(_M_X86_64) | ||
#define USE_SSE | ||
#elif defined(_M_ARM_64) | ||
#define USE_NEON | ||
#else | ||
#define NO_SIMD | ||
#endif | ||
|
||
#if defined(USE_SSE) | ||
#include <immintrin.h> | ||
#elif defined(USE_NEON) | ||
#include <arm_neon.h> | ||
#endif | ||
|
||
#include "VideoCommon/CPUCullImpl.h" | ||
#ifdef USE_SSE | ||
#define USE_SSE3 | ||
#include "VideoCommon/CPUCullImpl.h" | ||
#define USE_SSE41 | ||
#include "VideoCommon/CPUCullImpl.h" | ||
#define USE_AVX | ||
#include "VideoCommon/CPUCullImpl.h" | ||
#define USE_FMA | ||
#include "VideoCommon/CPUCullImpl.h" | ||
#endif | ||
|
||
#if defined(USE_SSE) | ||
#if defined(__AVX__) && defined(__FMA__) | ||
static constexpr int MIN_SSE = 51; | ||
#elif defined(__AVX__) | ||
static constexpr int MIN_SSE = 50; | ||
#elif defined(__SSE4_1__) | ||
static constexpr int MIN_SSE = 41; | ||
#elif defined(__SSE3__) | ||
static constexpr int MIN_SSE = 30; | ||
#else | ||
static constexpr int MIN_SSE = 0; | ||
#endif | ||
#endif | ||
|
||
template <bool PositionHas3Elems, bool PerVertexPosMtx> | ||
static CPUCull::TransformFunction GetTransformFunction() | ||
{ | ||
#if defined(USE_SSE) | ||
if (MIN_SSE >= 51 || (cpu_info.bAVX && cpu_info.bFMA)) | ||
return CPUCull_FMA::TransformVertices<PositionHas3Elems, PerVertexPosMtx>; | ||
else if (MIN_SSE >= 50 || cpu_info.bAVX) | ||
return CPUCull_AVX::TransformVertices<PositionHas3Elems, PerVertexPosMtx>; | ||
else if (PositionHas3Elems && PerVertexPosMtx && (MIN_SSE >= 41 || cpu_info.bSSE4_1)) | ||
return CPUCull_SSE41::TransformVertices<PositionHas3Elems, PerVertexPosMtx>; | ||
else if (PositionHas3Elems && (MIN_SSE >= 30 || cpu_info.bSSE3)) | ||
return CPUCull_SSE3::TransformVertices<PositionHas3Elems, PerVertexPosMtx>; | ||
else | ||
return CPUCull_SSE::TransformVertices<PositionHas3Elems, PerVertexPosMtx>; | ||
#elif defined(USE_NEON) | ||
return CPUCull_NEON::TransformVertices<PositionHas3Elems, PerVertexPosMtx>; | ||
#else | ||
return CPUCull_Scalar::TransformVertices<PositionHas3Elems, PerVertexPosMtx>; | ||
#endif | ||
} | ||
|
||
template <OpcodeDecoder::Primitive Primitive, CullMode Mode> | ||
static CPUCull::CullFunction GetCullFunction0() | ||
{ | ||
#if defined(USE_SSE) | ||
if (MIN_SSE >= 50 || cpu_info.bAVX) | ||
return CPUCull_AVX::CullVertices<Primitive, Mode>; | ||
else if (MIN_SSE >= 30 || cpu_info.bSSE3) | ||
return CPUCull_SSE3::CullVertices<Primitive, Mode>; | ||
else | ||
return CPUCull_SSE::CullVertices<Primitive, Mode>; | ||
#elif defined(USE_NEON) | ||
return CPUCull_NEON::CullVertices<Primitive, Mode>; | ||
#else | ||
return CPUCull_Scalar::CullVertices<Primitive, Mode>; | ||
#endif | ||
} | ||
|
||
template <OpcodeDecoder::Primitive Primitive> | ||
static Common::EnumMap<CPUCull::CullFunction, CullMode::All> GetCullFunction1() | ||
{ | ||
return { | ||
GetCullFunction0<Primitive, CullMode::None>(), | ||
GetCullFunction0<Primitive, CullMode::Back>(), | ||
GetCullFunction0<Primitive, CullMode::Front>(), | ||
GetCullFunction0<Primitive, CullMode::All>(), | ||
}; | ||
} | ||
|
||
CPUCull::~CPUCull() = default; | ||
|
||
void CPUCull::Init() | ||
{ | ||
m_transform_table[false][false] = GetTransformFunction<false, false>(); | ||
m_transform_table[false][true] = GetTransformFunction<false, true>(); | ||
m_transform_table[true][false] = GetTransformFunction<true, false>(); | ||
m_transform_table[true][true] = GetTransformFunction<true, true>(); | ||
using Prim = OpcodeDecoder::Primitive; | ||
m_cull_table[Prim::GX_DRAW_QUADS] = GetCullFunction1<Prim::GX_DRAW_QUADS>(); | ||
m_cull_table[Prim::GX_DRAW_QUADS_2] = GetCullFunction1<Prim::GX_DRAW_QUADS>(); | ||
m_cull_table[Prim::GX_DRAW_TRIANGLES] = GetCullFunction1<Prim::GX_DRAW_TRIANGLES>(); | ||
m_cull_table[Prim::GX_DRAW_TRIANGLE_STRIP] = GetCullFunction1<Prim::GX_DRAW_TRIANGLE_STRIP>(); | ||
m_cull_table[Prim::GX_DRAW_TRIANGLE_FAN] = GetCullFunction1<Prim::GX_DRAW_TRIANGLE_FAN>(); | ||
} | ||
|
||
bool CPUCull::CullVertices(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive, | ||
const u8* src, u32 count) | ||
{ | ||
ASSERT_MSG(VIDEO, primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES, | ||
"CPUCull should not be called on lines or points"); | ||
const u32 stride = loader->m_native_vtx_decl.stride; | ||
const bool posHas3Elems = loader->m_native_vtx_decl.position.components >= 3; | ||
const bool perVertexPosMtx = loader->m_native_vtx_decl.posmtx.enable; | ||
if (m_transform_buffer_size < count) | ||
{ | ||
u32 new_size = std::max<u32>(m_transform_buffer_size, 64); | ||
while (new_size < count) | ||
new_size <<= 1; | ||
m_transform_buffer_size = new_size; | ||
m_transform_buffer.reset(static_cast<TransformedVertex*>( | ||
Common::AllocateAlignedMemory(new_size * sizeof(TransformedVertex), 32))); | ||
} | ||
|
||
// transform functions need the projection matrix to tranform to clip space | ||
VertexShaderManager::SetProjectionMatrix(); | ||
|
||
static constexpr Common::EnumMap<CullMode, CullMode::All> cullmode_invert = { | ||
CullMode::None, CullMode::Front, CullMode::Back, CullMode::All}; | ||
|
||
CullMode cullmode = bpmem.genMode.cullmode; | ||
if (xfmem.viewport.ht > 0) // See videosoftware Clipper.cpp:IsBackface | ||
cullmode = cullmode_invert[cullmode]; | ||
const TransformFunction transform = m_transform_table[posHas3Elems][perVertexPosMtx]; | ||
transform(m_transform_buffer.get(), src, stride, count); | ||
const CullFunction cull = m_cull_table[primitive][cullmode]; | ||
return cull(m_transform_buffer.get(), count); | ||
} | ||
|
||
template <typename T> | ||
void CPUCull::BufferDeleter<T>::operator()(T* ptr) | ||
{ | ||
Common::FreeAlignedMemory(ptr); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// Copyright 2022 Dolphin Emulator Project | ||
// SPDX-License-Identifier: GPL-2.0-or-later | ||
|
||
#pragma once | ||
|
||
#include "VideoCommon/BPMemory.h" | ||
#include "VideoCommon/DataReader.h" | ||
#include "VideoCommon/OpcodeDecoding.h" | ||
|
||
class CPUCull | ||
{ | ||
public: | ||
~CPUCull(); | ||
void Init(); | ||
bool CullVertices(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive, const u8* src, | ||
u32 count); | ||
|
||
struct alignas(16) TransformedVertex | ||
{ | ||
float x, y, z, w; | ||
}; | ||
|
||
using TransformFunction = void (*)(void*, const void*, u32, int); | ||
using CullFunction = bool (*)(const CPUCull::TransformedVertex*, int); | ||
|
||
private: | ||
template <typename T> | ||
struct BufferDeleter | ||
{ | ||
void operator()(T* ptr); | ||
}; | ||
std::unique_ptr<TransformedVertex[], BufferDeleter<TransformedVertex>> m_transform_buffer; | ||
u32 m_transform_buffer_size = 0; | ||
std::array<std::array<TransformFunction, 2>, 2> m_transform_table; | ||
Common::EnumMap<Common::EnumMap<CullFunction, CullMode::All>, | ||
OpcodeDecoder::Primitive::GX_DRAW_TRIANGLE_FAN> | ||
m_cull_table; | ||
}; |
Oops, something went wrong.