Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid __builtin_ctzll on some platforms #604

Merged
merged 1 commit into from
Aug 3, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions cbits/is-valid-utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ SUCH DAMAGE.
#endif

#include <MachDeps.h>
#include "Rts.h"
Copy link

@juhp juhp Aug 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to break x86_64 on Fedora for me... booting 9.4.6 with either 9.2.8 or 9.4.5:

In file included from /usr/lib64/ghc-9.4.5/lib/x86_64-linux-ghc-9.4.5/rts-1.0.2/include/Rts.h:235,
                 from libraries/bytestring/cbits/is-valid-utf8.c:53:0: error:
    
/usr/lib64/ghc-9.4.5/lib/x86_64-linux-ghc-9.4.5/rts-1.0.2/include/rts/OSThreads.h:38:5: error:
     error: unknown type name ‘clockid_t’
       38 |     clockid_t timeout_clk;
          |     ^~~~~~~~~
   |
38 |     clockid_t timeout_clk;
   |     ^

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks to me like that #include should be conditional somehow.


#ifdef WORDS_BIGENDIAN
#define to_little_endian(x) __builtin_bswap64(x)
Expand All @@ -66,6 +67,29 @@ static inline uint64_t read_uint64(const uint64_t *p) {
return r;
}

// stand-in for __builtin_ctzll, used because __builtin_ctzll can
// cause runtime linker issues for GHC in some exotic situations (#601)
//
// See also these ghc issues:
// * https://gitlab.haskell.org/ghc/ghc/-/issues/21787
// * https://gitlab.haskell.org/ghc/ghc/-/issues/22011
static inline int hs_bytestring_ctz64(const uint64_t x) {
// These CPP conditions are taken from ghc-prim:
// https://gitlab.haskell.org/ghc/ghc/-/blob/73b5c7ce33929e1f7c9283ed7c2860aa40f6d0ec/libraries/ghc-prim/cbits/ctz.c#L31-57
// credit to Herbert Valerio Riedel, Erik de Castro Lopo
#if defined(__GNUC__) && (defined(i386_HOST_ARCH) || defined(powerpc_HOST_ARCH))
uint32_t xhi = (uint32_t)(x >> 32);
uint32_t xlo = (uint32_t) x;
return xlo ? __builtin_ctz(xlo) : 32 + __builtin_ctz(xhi);
#elif SIZEOF_UNSIGNED_LONG == 8
return __builtin_ctzl(x);
#elif SIZEOF_UNSIGNED_LONG_LONG == 8
return __builtin_ctzll(x);
#else
# error no suitable __builtin_ctz() found
#endif
}

static inline int is_valid_utf8_fallback(uint8_t const *const src,
size_t const len) {
uint8_t const *ptr = (uint8_t const *)src;
Expand Down Expand Up @@ -100,16 +124,16 @@ static inline int is_valid_utf8_fallback(uint8_t const *const src,
if (results[3] == 0) {
ptr += 8;
} else {
ptr += (__builtin_ctzll(results[3]) / 8);
ptr += (hs_bytestring_ctz64(results[3]) / 8);
}
} else {
ptr += (__builtin_ctzll(results[2]) / 8);
ptr += (hs_bytestring_ctz64(results[2]) / 8);
}
} else {
ptr += (__builtin_ctzll(results[1]) / 8);
ptr += (hs_bytestring_ctz64(results[1]) / 8);
}
} else {
ptr += (__builtin_ctzll(results[0]) / 8);
ptr += (hs_bytestring_ctz64(results[0]) / 8);
}
}
}
Expand Down Expand Up @@ -207,16 +231,16 @@ static inline int is_valid_utf8_sse2(uint8_t const *const src,
if (result == 0) {
ptr += 16;
} else {
ptr += __builtin_ctzll(result);
ptr += __builtin_ctz(result);
}
} else {
ptr += __builtin_ctzll(result);
ptr += __builtin_ctz(result);
}
} else {
ptr += __builtin_ctzll(result);
ptr += __builtin_ctz(result);
}
} else {
ptr += __builtin_ctzll(result);
ptr += __builtin_ctz(result);
}
}
}
Expand Down
Loading