From ee6ffac28eb82ee4f33212f468ea3849a78a24a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 1 Nov 2023 20:07:40 -0600 Subject: [PATCH 1/2] Ignore triangle strips with less than 3 vertices. Should fix the new issue reported in #18273 --- GPU/Common/IndexGenerator.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 679fdc259cfd..261cd184705a 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -115,6 +115,9 @@ alignas(16) static const uint16_t offsets_counter_clockwise[24] = { void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) { int numTris = numVerts - 2; + if (numTris <= 0) { + return; + } #ifdef _M_SSE // In an SSE2 register we can fit 8 16-bit integers. // However, we need to output a multiple of 3 indices. From 48a134835283da448a3bc705a317f45e458d2869 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 1 Nov 2023 21:30:04 -0600 Subject: [PATCH 2/2] Move a var for clarity --- GPU/Common/IndexGenerator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 261cd184705a..45a23d991a32 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -126,7 +126,6 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) { // We allow ourselves to write some extra indices to avoid the fallback loop. // That's alright as we're appending to a buffer - they will get overwritten anyway. - int numChunks = (numTris + 7) >> 3; __m128i ibase8 = _mm_set1_epi16(indexOffset); const __m128i *offsets = (const __m128i *)(clockwise ? offsets_clockwise : offsets_counter_clockwise); __m128i *dst = (__m128i *)inds_; @@ -140,6 +139,7 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) { __m128i offsets2 = _mm_add_epi16(ibase8, _mm_load_si128(offsets + 2)); _mm_storeu_si128(dst + 2, offsets2); __m128i increment = _mm_set1_epi16(8); + int numChunks = (numTris + 7) >> 3; for (int i = 1; i < numChunks; i++) { dst += 3; offsets0 = _mm_add_epi16(offsets0, increment); @@ -154,7 +154,6 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) { inds_ += numTris * 3; // wind doesn't need to be updated, an even number of triangles have been drawn. #elif PPSSPP_ARCH(ARM_NEON) - int numChunks = (numTris + 7) >> 3; uint16x8_t ibase8 = vdupq_n_u16(indexOffset); const u16 *offsets = clockwise ? offsets_clockwise : offsets_counter_clockwise; u16 *dst = inds_; @@ -167,6 +166,7 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) { uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16)); vst1q_u16(dst + 16, offsets2); uint16x8_t increment = vdupq_n_u16(8); + int numChunks = (numTris + 7) >> 3; for (int i = 1; i < numChunks; i++) { dst += 3 * 8; offsets0 = vaddq_u16(offsets0, increment);