diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 436ddd6b5a77..7eb4c800794a 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -163,19 +163,23 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) { u16 *dst = inds_; uint16x8_t offsets0 = vaddq_u16(ibase8, vld1q_u16(offsets)); vst1q_u16(dst, offsets0); - uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8)); - vst1q_u16(dst + 8, offsets1); - uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16)); - vst1q_u16(dst + 16, offsets2); - uint16x8_t increment = vdupq_n_u16(8); - for (int i = 1; i < numChunks; i++) { - dst += 3 * 8; - offsets0 = vaddq_u16(offsets0, increment); - offsets1 = vaddq_u16(offsets1, increment); - offsets2 = vaddq_u16(offsets2, increment); - vst1q_u16(dst, offsets0); + if (numTris > 2) { + uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8)); vst1q_u16(dst + 8, offsets1); - vst1q_u16(dst + 16, offsets2); + if (numTris > 5) { + uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16)); + vst1q_u16(dst + 16, offsets2); + uint16x8_t increment = vdupq_n_u16(8); + for (int i = 1; i < numChunks; i++) { + dst += 3 * 8; + offsets0 = vaddq_u16(offsets0, increment); + offsets1 = vaddq_u16(offsets1, increment); + offsets2 = vaddq_u16(offsets2, increment); + vst1q_u16(dst, offsets0); + vst1q_u16(dst + 8, offsets1); + vst1q_u16(dst + 16, offsets2); + } + } } inds_ += numTris * 3; #else