Permalink
Browse files

Add a SIMD decoder

This decoder decodes four characters at a time using vector operations.

Performance results on an Intel i7-7500U @ 2.7GHz (running ubuntu kernel 4.10.0-35-generic):

asm: 664.000079 MB/s, 0 errors
simd: 754.666757 MB/s, 0 errors
branchless: 448.000053 MB/s, 0 errors
Hoehrmann:  372.000044 MB/s, 0 errors

Performance on an i7-4770 @ 3.40 GHz (running Windows Linux mode):
asm: 653.333411 MB/s, 0 errors
simd: 21.333336 MB/s, 0 errors
branchless: 464.000055 MB/s, 0 errors
Hoehrmann:  386.666713 MB/s, 0 errors

As you can see, depending on the CPU microarchitecture it may or may not be a win.
  • Loading branch information...
bdonlan committed Oct 9, 2017
1 parent fd1571a commit 3802d3b0e10ea16810dd40f8116243971ff7603d
Showing with 617 additions and 6 deletions.
  1. +5 −4 Makefile
  2. +29 −1 test/benchmark.c
  3. +432 −0 test/decode-simd.s
  4. +9 −0 test/simd-assist.c
  5. +142 −1 test/tests.c
View
@@ -1,13 +1,14 @@
CC = cc -std=c99
CFLAGS = -Wall -Wextra -O3 -g3 -march=native
ASM = test/decode.s test/decode-simd.s test/simd-assist.c
all: benchmark tests
benchmark: test/benchmark.c utf8.h test/utf8-encode.h test/bh-utf8.h test/decode.s
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ test/benchmark.c test/decode.s $(LDLIBS)
benchmark: test/benchmark.c utf8.h test/utf8-encode.h test/bh-utf8.h $(ASM)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ test/benchmark.c $(ASM) $(LDLIBS)
tests: test/tests.c utf8.h test/utf8-encode.h test/decode.s
$(CC) $(CFLAGS) -O0 $(LDFLAGS) -o $@ test/tests.c test/decode.s $(LDLIBS)
tests: test/tests.c utf8.h test/utf8-encode.h $(ASM)
$(CC) $(CFLAGS) -O0 $(LDFLAGS) -o $@ test/tests.c $(ASM) $(LDLIBS)
bench: benchmark
./benchmark
View
@@ -14,6 +14,7 @@
#define BUFLEN 8 // MB
int utf8_decode_asm(const unsigned char **restrict inbufp, size_t inbufsz, unsigned int **restrict outbufp, size_t outbufsz);
int utf8_decode_simd(const unsigned char **restrict inbufp, size_t inbufsz, unsigned int **restrict outbufp, size_t outbufsz);
static uint32_t
pcg32(uint64_t *s)
@@ -81,7 +82,9 @@ main(void)
unsigned char *buffer = malloc(z);
unsigned char *end = buffer_fill(buffer, z);
unsigned int *outbuf = malloc(z * 4);
double rate;
#if 1
/* Benchmark the asm decoder */
running = 1;
signal(SIGALRM, alarm_handler);
@@ -101,9 +104,33 @@ main(void)
n++;
} while (running);
double rate = n * (end - buffer) / (double)SECONDS / 1024 / 1024;
rate = n * (end - buffer) / (double)SECONDS / 1024 / 1024;
printf("asm: %f MB/s, %ld errors\n", rate, errors);
#endif
/* Benchmark the simd decoder */
running = 1;
signal(SIGALRM, alarm_handler);
alarm(SECONDS);
errors = n = 0;
do {
const unsigned char *p = buffer;
long count = 0;
while (p < end) {
unsigned int *outbufp = outbuf;
if (utf8_decode_simd(&p, end - p, &outbufp, z * 4) <= 0) {
errors++;
}
count++;
}
if (p == end) // reached the end successfully?
n++;
} while (running);
rate = n * (end - buffer) / (double)SECONDS / 1024 / 1024;
printf("simd: %f MB/s, %ld errors\n", rate, errors);
#if 1
/* Benchmark the branchless decoder */
running = 1;
signal(SIGALRM, alarm_handler);
@@ -148,6 +175,7 @@ main(void)
rate = n * (end - buffer) / (double)SECONDS / 1024 / 1024;
printf("Hoehrmann: %f MB/s, %ld errors\n", rate, errors);
#endif
free(buffer);
}
Oops, something went wrong.

0 comments on commit 3802d3b

Please sign in to comment.