Skip to content

Commit

Permalink
Then add the early-outs for NEON too.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Jun 13, 2023
1 parent 9647872 commit 0eb3702
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions GPU/Common/IndexGenerator.cpp
Expand Up @@ -163,19 +163,23 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
u16 *dst = inds_;
uint16x8_t offsets0 = vaddq_u16(ibase8, vld1q_u16(offsets));
vst1q_u16(dst, offsets0);
uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8));
vst1q_u16(dst + 8, offsets1);
uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16));
vst1q_u16(dst + 16, offsets2);
uint16x8_t increment = vdupq_n_u16(8);
for (int i = 1; i < numChunks; i++) {
dst += 3 * 8;
offsets0 = vaddq_u16(offsets0, increment);
offsets1 = vaddq_u16(offsets1, increment);
offsets2 = vaddq_u16(offsets2, increment);
vst1q_u16(dst, offsets0);
if (numTris > 2) {
uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8));
vst1q_u16(dst + 8, offsets1);
vst1q_u16(dst + 16, offsets2);
if (numTris > 5) {
uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16));
vst1q_u16(dst + 16, offsets2);
uint16x8_t increment = vdupq_n_u16(8);
for (int i = 1; i < numChunks; i++) {
dst += 3 * 8;
offsets0 = vaddq_u16(offsets0, increment);
offsets1 = vaddq_u16(offsets1, increment);
offsets2 = vaddq_u16(offsets2, increment);
vst1q_u16(dst, offsets0);
vst1q_u16(dst + 8, offsets1);
vst1q_u16(dst + 16, offsets2);
}
}
}
inds_ += numTris * 3;
#else
Expand Down

0 comments on commit 0eb3702

Please sign in to comment.