Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Optimize latin1-to-UTF16 C-implementation by using 32-bit loads

  • Loading branch information...
commit ea0e522575a1e4367df7bad8c6370cf332e95740 1 parent 3cec9a2
@hvr hvr authored
Showing with 24 additions and 0 deletions.
  1. +24 −0 cbits/cbits.c
View
24 cbits/cbits.c
@@ -71,6 +71,30 @@ _hs_text_decode_latin1(uint16_t *dest, const uint8_t const *src,
const uint8_t const *srcend)
{
const uint8_t *p = src;
+
+#if defined(__i386__) || defined(__x86_64__)
+ /* This optimization works on a little-endian systems by using
+ (aligned) 32-bit loads instead of 8-bit loads
+ */
+
+ /* consume unaligned prefix */
+ while (p != srcend && (uintptr_t)p & 0x3)
+ *dest++ = *p++;
+
+ /* iterate over 32-bit aligned loads */
+ while (p < srcend - 3) {
+ const uint32_t w = *((const uint32_t *)p);
+
+ *dest++ = w & 0xff;
+ *dest++ = (w >> 8) & 0xff;
+ *dest++ = (w >> 16) & 0xff;
+ *dest++ = (w >> 24) & 0xff;
+
+ p += 4;
+ }
+#endif
+
+ /* handle unaligned suffix */
while (p != srcend)
*dest++ = *p++;
}
Please sign in to comment.
Something went wrong with that request. Please try again.