Skip to content

Commit

Permalink
Merge pull request #18394 from hrydgard/driver76-fix-new-crash
Browse files Browse the repository at this point in the history
Ignore triangle strips with less than 3 vertices.
  • Loading branch information
hrydgard committed Nov 2, 2023
2 parents a4d667c + 48a1348 commit 157fabf
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions GPU/Common/IndexGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ alignas(16) static const uint16_t offsets_counter_clockwise[24] = {

void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {
int numTris = numVerts - 2;
if (numTris <= 0) {
return;
}
#ifdef _M_SSE
// In an SSE2 register we can fit 8 16-bit integers.
// However, we need to output a multiple of 3 indices.
Expand All @@ -123,7 +126,6 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {

// We allow ourselves to write some extra indices to avoid the fallback loop.
// That's alright as we're appending to a buffer - they will get overwritten anyway.
int numChunks = (numTris + 7) >> 3;
__m128i ibase8 = _mm_set1_epi16(indexOffset);
const __m128i *offsets = (const __m128i *)(clockwise ? offsets_clockwise : offsets_counter_clockwise);
__m128i *dst = (__m128i *)inds_;
Expand All @@ -137,6 +139,7 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {
__m128i offsets2 = _mm_add_epi16(ibase8, _mm_load_si128(offsets + 2));
_mm_storeu_si128(dst + 2, offsets2);
__m128i increment = _mm_set1_epi16(8);
int numChunks = (numTris + 7) >> 3;
for (int i = 1; i < numChunks; i++) {
dst += 3;
offsets0 = _mm_add_epi16(offsets0, increment);
Expand All @@ -151,7 +154,6 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {
inds_ += numTris * 3;
// wind doesn't need to be updated, an even number of triangles have been drawn.
#elif PPSSPP_ARCH(ARM_NEON)
int numChunks = (numTris + 7) >> 3;
uint16x8_t ibase8 = vdupq_n_u16(indexOffset);
const u16 *offsets = clockwise ? offsets_clockwise : offsets_counter_clockwise;
u16 *dst = inds_;
Expand All @@ -164,6 +166,7 @@ void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {
uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16));
vst1q_u16(dst + 16, offsets2);
uint16x8_t increment = vdupq_n_u16(8);
int numChunks = (numTris + 7) >> 3;
for (int i = 1; i < numChunks; i++) {
dst += 3 * 8;
offsets0 = vaddq_u16(offsets0, increment);
Expand Down

0 comments on commit 157fabf

Please sign in to comment.