Skip to content

Commit

Permalink
Disable the new culling on RISC-V for now.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Dec 9, 2023
1 parent 4e2a1bf commit 7e85d3d
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 11 deletions.
2 changes: 1 addition & 1 deletion Common/Math/CrossSIMD.h
Expand Up @@ -10,7 +10,7 @@

#include <cstdint>

#ifdef _M_SSE
#if PPSSPP_ARCH(SSE2)
#include <emmintrin.h>
#endif

Expand Down
15 changes: 5 additions & 10 deletions GPU/Common/DrawEngineCommon.cpp
Expand Up @@ -433,12 +433,12 @@ bool DrawEngineCommon::TestBoundingBoxFast(const void *vdata, int vertexCount, u
break;
case GE_VTYPE_POS_16BIT:
{
#if defined(_M_SSE)
#if PPSSPP_ARCH(SSE2)
__m128 scaleFactor = _mm_set1_ps(1.0f / 32768.0f);
for (int i = 0; i < vertexCount; i++) {
const s16 *data = ((const s16 *)((const s8 *)vdata + i * stride + offset));
__m128i bits = _mm_castpd_si128(_mm_load_sd((const double *)data));
// Sign extension. Ugly without SSE4.
// Sign extension. Hacky without SSE4.
bits = _mm_srai_epi32(_mm_unpacklo_epi16(bits, bits), 16);
__m128 pos = _mm_mul_ps(_mm_cvtepi32_ps(bits), scaleFactor);
_mm_storeu_ps(verts + i * 3, pos); // TODO: use stride 4 to avoid clashing writes?
Expand Down Expand Up @@ -470,11 +470,7 @@ bool DrawEngineCommon::TestBoundingBoxFast(const void *vdata, int vertexCount, u
// We only check the 4 sides. Near/far won't likely make a huge difference.
// We test one vertex against 4 planes to get some SIMD. Vertices need to be transformed to world space
// for testing, don't want to re-do that, so we have to use that "pivot" of the data.
<<<<<<< HEAD
#ifdef _M_SSE
=======
#if PPSSPP_ARCH(SSE2)
>>>>>>> c5a94c3799 (Buildfix again)
const __m128 worldX = _mm_loadu_ps(gstate.worldMatrix);
const __m128 worldY = _mm_loadu_ps(gstate.worldMatrix + 3);
const __m128 worldZ = _mm_loadu_ps(gstate.worldMatrix + 6);
Expand All @@ -498,9 +494,9 @@ bool DrawEngineCommon::TestBoundingBoxFast(const void *vdata, int vertexCount, u
);
// OK, now we check it against the four planes.
// This is really curiously similar to a matrix multiplication (well, it is one).
__m128 posX = _mm_shuffle_ps(worldpos, worldpos, 0);
__m128 posY = _mm_shuffle_ps(worldpos, worldpos, 1 | (1 << 2) | (1 << 4) | (1 << 6));
__m128 posZ = _mm_shuffle_ps(worldpos, worldpos, 2 | (2 << 2) | (2 << 4) | (2 << 6));
__m128 posX = _mm_shuffle_ps(worldpos, worldpos, _MM_SHUFFLE(0, 0, 0, 0));
__m128 posY = _mm_shuffle_ps(worldpos, worldpos, _MM_SHUFFLE(1, 1, 1, 1));
__m128 posZ = _mm_shuffle_ps(worldpos, worldpos, _MM_SHUFFLE(2, 2, 2, 2));
__m128 planeDist = _mm_add_ps(
_mm_add_ps(
_mm_mul_ps(planeX, posX),
Expand Down Expand Up @@ -566,7 +562,6 @@ bool DrawEngineCommon::TestBoundingBoxFast(const void *vdata, int vertexCount, u
}
}
#endif

return true;
}

Expand Down
9 changes: 9 additions & 0 deletions GPU/GPUCommonHW.cpp
Expand Up @@ -992,8 +992,17 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {

#define MAX_CULL_CHECK_COUNT 6

// For now, turn off culling on platforms where we don't have SIMD bounding box tests, like RISC-V.
#if PPSSPP_ARCH(ARM_NEON) || PPSSPP_ARCH(SSE2)

#define PASSES_CULLING ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHT_MASK | GE_VTYPE_IDX_MASK)) || count > MAX_CULL_CHECK_COUNT)

#else

#define PASSES_CULLING true

#endif

// If certain conditions are true, do frustum culling.
bool passCulling = PASSES_CULLING;
if (!passCulling) {
Expand Down
2 changes: 2 additions & 0 deletions ppsspp_config.h
Expand Up @@ -11,6 +11,7 @@
#if defined(_M_IX86) || defined(__i386__) || defined (__EMSCRIPTEN__)
#define PPSSPP_ARCH_X86 1
#define PPSSPP_ARCH_32BIT 1
#define PPSSPP_ARCH_SSE2 1
//TODO: Remove this compat define
#ifndef _M_IX86
#define _M_IX86 600
Expand All @@ -19,6 +20,7 @@

#if (defined(_M_X64) || defined(__amd64__) || defined(__x86_64__)) && !defined(__EMSCRIPTEN__)
#define PPSSPP_ARCH_AMD64 1
#define PPSSPP_ARCH_SSE2 1
#if defined(__ILP32__)
#define PPSSPP_ARCH_32BIT 1
#else
Expand Down

0 comments on commit 7e85d3d

Please sign in to comment.