Skip to content

Commit

Permalink
Implement intersperse using SSE2
Browse files Browse the repository at this point in the history
  • Loading branch information
ethercrow committed Nov 4, 2020
1 parent a83b778 commit 1b4e1fa
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
3 changes: 3 additions & 0 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,9 @@ main = do
, benchFE "floatHexFixed" $ fromIntegral >$< P.floatHexFixed
, benchFE "doubleHexFixed" $ fromIntegral >$< P.doubleHexFixed
]
, bgroup "intersperse"
[ bench "intersperse" $ whnf (S.intersperse 32) byteStringData
]
, bgroup "partition"
[
bgroup "strict"
Expand Down
20 changes: 19 additions & 1 deletion cbits/fpstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
*/

#include "fpstring.h"
#if defined(__x86_64__)
#include <emmintrin.h>
#include <xmmintrin.h>
#endif

/* copy a string in reverse */
void fps_reverse(unsigned char *q, unsigned char *p, size_t n) {
Expand All @@ -44,7 +48,21 @@ void fps_intersperse(unsigned char *q,
unsigned char *p,
size_t n,
unsigned char c) {

#if defined(__x86_64__)
{
const __m128i separator = _mm_set1_epi8(c);
const unsigned char *const p_begin = p;
const unsigned char *const p_end = p_begin + n - 9;
while (p < p_end) {
const __m128i eight_src_bytes = _mm_loadl_epi64((__m128i *)p);
const __m128i sixteen_dst_bytes = _mm_unpacklo_epi8(eight_src_bytes, separator);
_mm_storeu_si128((__m128i *)q, sixteen_dst_bytes);
p += 8;
q += 16;
}
n -= p - p_begin;
}
#endif
while (n > 1) {
*q++ = *p++;
*q++ = c;
Expand Down

0 comments on commit 1b4e1fa

Please sign in to comment.