Permalink
Cannot retrieve contributors at this time
602 lines (533 sloc)
18.8 KB
| #ifndef BITOPS_HH | |
| #define BITOPS_HH | |
| #if defined(__cpp_constexpr) && __cpp_constexpr >= 201304 | |
| #define constexpr14 constexpr | |
| #else | |
| #define constexpr14 | |
| #endif | |
| #include <cstdint> | |
| #include <cstddef> | |
| #include <climits> | |
| #include <limits> | |
| #include <type_traits> | |
| #include <algorithm> | |
| namespace std { | |
| //This implementation makes the following platform assumptions: | |
| //* signed right shift is an arithmetic shift | |
| //* CHAR_BIT == 8 | |
| //* Native integer types are exactly 8, 16, 32, and 64 bits wide. No support is added for larger types. | |
| //* Signed numbers are implemented using 2's compliment | |
| // | |
| //The implementation is not designed to be efficient. The purpose is only to prove that each proposed function is implementable. | |
| //A real implementation may use techniques such as SFINAE, static_assert, overloading, and/or = delete to limit the set of overloads. | |
| //These have been omitted here to improve readability. | |
| //////////////////////////////////// | |
| //Explicit shifts | |
| //////////////////////////////////// | |
| //Logical left shift, undefined if s < 0 or x > sizeof(x) * CHAR_BIT | |
| //Just about every processor in existance has this | |
| //Included for symmetry | |
| template <typename Integral> | |
| constexpr Integral shll(Integral x, int s) noexcept { | |
| return Integral(typename std::make_unsigned<Integral>::type(x) << s); | |
| } | |
| //Logical right shift, undefined if s < 0 or x > sizeof(x) * CHAR_BIT | |
| //Just about every processor in existance has this | |
| //Included for symmetry, also can right shift a signed number easily without a cast to unsigned | |
| template <typename Integral> | |
| constexpr Integral shlr(Integral x, int s) noexcept { | |
| return Integral(typename std::make_unsigned<Integral>::type(x) >> s); | |
| } | |
| //Arithmetic left shift, undefined if s < 0 or x > sizeof(x) * CHAR_BIT | |
| //Just about every processor in existance has this | |
| //Included for symmetry | |
| template <typename Integral> | |
| constexpr Integral shal(Integral x, int s) noexcept { | |
| return shll(x, s); | |
| } | |
| //Arithmetic right shift, undefined if s < 0 or x > sizeof(x) * CHAR_BIT | |
| //Just about every processor in existance has this, signed right shift is implementation defined. There is no standards compliant alternative to shar(). | |
| template <typename Integral> | |
| constexpr Integral shar(Integral x, int s) noexcept { | |
| //Assumes signed right shift is arithmetic. If it is not the platform will need to implement this another way. | |
| return Integral(typename std::make_signed<Integral>::type(x) >> s); | |
| } | |
| //Circular left shift (rotate), undefined if s < 0 or x > sizeof(x) * CHAR_BIT | |
| //Just about every processor in existance has this, including the PDP-11 (1969) and yet C or C++ never included a way to get at this instruction. | |
| template <typename Integral> | |
| constexpr Integral rotl(Integral x, int s) noexcept { | |
| return (x << s) | shlr(x, (sizeof(x)*CHAR_BIT-s)); | |
| } | |
| //Circular right shift (rotate), undefined if s < 0 or x > sizeof(x) * CHAR_BIT | |
| //Just about every processor in existance has this, including the PDP-11 (1969) and yet C or C++ never included a way to get at this instruction. | |
| template <typename Integral> | |
| constexpr Integral rotr(Integral x, int s) noexcept { | |
| return shlr(x, s) | ( x << (sizeof(x)*CHAR_BIT-s)); | |
| } | |
| //////////////////////////////////// | |
| //Zero and One Counting algorithms | |
| //////////////////////////////////// | |
| //Returns the number of trailing zeros in x, or sizeof(x) * CHAR_BIT if x is 0 | |
| //i386 bsf, cmov | |
| //x86_64 w/ BMI1: tzcnt | |
| //Alpha: cttz | |
| //MIPS: CLZ | |
| //gcc: x == 0 ? sizeof(x) * CHAR_BIT :__builtin_ctz(x) | |
| //Applications: SSE2 strlen, Howard Hinnant's gcd example | |
| template <typename Integral> | |
| constexpr14 int cntt0(Integral x) noexcept { | |
| constexpr int nbits = int(sizeof(x) * CHAR_BIT); | |
| if(x == 0) { return nbits; } | |
| Integral n = 0; | |
| if(sizeof(x) > 1) { | |
| if(sizeof(x) > 2) { | |
| if(sizeof(x) > 4) { | |
| if((x & Integral(0xFFFFFFFFUL)) == 0) { n = n + 32; x = shlr(x, 32); } | |
| } | |
| if((x & Integral(0xFFFFUL)) == 0) { n = n + 16; x = shlr(x, 16); } | |
| } | |
| if((x & Integral(0xFFUL)) == 0) { n = n + 8; x = shlr(x, 8); } | |
| } | |
| if((x & Integral(0xFUL)) == 0) { n = n + 4; x = shlr(x, 4); } | |
| if((x & Integral(0x3UL)) == 0) { n = n + 2; x = shlr(x, 2); } | |
| return n - (x & 1); | |
| } | |
| //Returns the number of leading zeroes in x, or sizeof(x) * CHAR_BIT if x is 0 | |
| //i386 bsr, cmov | |
| //x86_64 w/ SSE4: lzcnt | |
| //ARMv5: CLZ | |
| //IA64: clz | |
| //Alpha: CTLZ | |
| //PowerPC: cntlz[dw] | |
| //gcc: x == 0 ? sizeof(x) * CHAR_BIT :__builtin_clz(x) | |
| template <typename Integral> | |
| constexpr14 int cntl0(Integral x) noexcept { | |
| constexpr int nbits = int(sizeof(x) * CHAR_BIT); | |
| if(x == 0) { return nbits; } | |
| Integral n = 1; | |
| if(sizeof(x) > 1) { | |
| if(sizeof(x) > 2) { | |
| if(sizeof(x) > 4) { | |
| if((shlr(x, nbits-32)) == 0) { n = n + 32; x = x << 32; } | |
| } | |
| if((shlr(x, nbits-16)) == 0) { n = n + 16; x = x << 16; } | |
| } | |
| if((shlr(x, nbits-8)) == 0) { n = n + 8; x = x << 8; } | |
| } | |
| if((shlr(x, nbits-4)) == 0) { n = n + 4; x = x << 4; } | |
| if((shlr(x, nbits-2)) == 0) { n = n + 2; x = x << 2; } | |
| n = n - (shlr(x, 31)); | |
| return n; | |
| } | |
| //Returns the number of leading 1 bits in x. | |
| //ARMv8: CLS | |
| //Blackfin: SIGNBITS | |
| //C6X: NORM | |
| //Picochip: SBC | |
| //MIPS: CTO | |
| //gcc: __builtin_clrsb(x) | |
| template <typename Integral> | |
| constexpr14 int cntl1(Integral x) noexcept { | |
| return cntl0(~x); | |
| } | |
| //Returns the number of trailing 1 bits in x. | |
| template <typename Integral> | |
| constexpr14 int cntt1(Integral x) noexcept { | |
| return cntt0(~x); | |
| } | |
| //Returns the number of 1 bits in x. | |
| //x86_64 SSE4: popcnt | |
| //IA64: popcnt | |
| //Alpha: CTPOP | |
| //PowerPC: popcntb | |
| //SparcV9: POPC | |
| //Blackfin: ONES | |
| //gcc: __builtin_popcount(x) | |
| template <typename Integral> | |
| constexpr14 int popcount(Integral x) noexcept { | |
| x = (x & Integral(0x5555555555555555UL)) + (shlr(x, 1) & Integral(0x5555555555555555UL)); | |
| x = (x & Integral(0x3333333333333333UL)) + (shlr(x, 2) & Integral(0x3333333333333333UL)); | |
| x = (x & Integral(0x0F0F0F0F0F0F0F0FUL)) + (shlr(x, 4) & Integral(0x0F0F0F0F0F0F0F0FUL)); | |
| if(sizeof(x) > 1) { | |
| x = (x & Integral(0x00FF00FF00FF00FFUL)) + (shlr(x, 8) & Integral(0x00FF00FF00FF00FFUL)); | |
| if(sizeof(x) > 2) { | |
| x = (x & Integral(0x0000FFFF0000FFFFUL)) + (shlr(x, 16) & Integral(0x0000FFFF0000FFFFUL)); | |
| if(sizeof(x) > 4) { | |
| x = (x & Integral(0x00000000FFFFFFFFUL)) + (shlr(x, 32) & Integral(0x00000000FFFFFFFFUL)); | |
| } | |
| } | |
| } | |
| return x; | |
| } | |
| //Returns the number of 1 bits in x mod 2 | |
| //gcc: __builtin_parity(x) | |
| template <typename Integral> | |
| constexpr14 int parity(Integral x) noexcept { | |
| x = x ^ shlr(x, 1); | |
| x = x ^ shlr(x, 2); | |
| x = x ^ shlr(x, 4); | |
| if(sizeof(x) > 1) { | |
| x = x ^ shlr(x, 8); | |
| if(sizeof(x) > 2) { | |
| x = x ^ shlr(x, 16); | |
| if(sizeof(x) > 4) { | |
| x = x ^ shlr(x, 32); | |
| } | |
| } | |
| } | |
| return x; | |
| } | |
| //////////////////////////////////// | |
| //Rightmost bit manipulation | |
| //////////////////////////////////// | |
| //Reset least siginificant 1 bit | |
| //Resets the least siginificant 1 bit of x. Returns 0 if x is 0. | |
| //x86_64 BMI1: BLSR | |
| template <typename Integral> | |
| constexpr Integral rstls1b(Integral x) { | |
| return x & (x-1); | |
| } | |
| //Set the least significant 0 bit | |
| //x86_64 AMD TBM: BLCS | |
| template <typename Integral> | |
| constexpr Integral setls0b(Integral x) { | |
| return x | (x + 1); | |
| } | |
| //Isolate least siginificant 1 bit | |
| //Isolates the least significant 1 bit of x and returns it. Returns 0 if x is 0. | |
| //x86_64 BMI1: BLSI | |
| //x86_64 AMD TBM: BLSIC, NOT | |
| template <typename Integral> | |
| constexpr Integral isols1b(Integral x) { | |
| return x & -x; | |
| } | |
| //Set the least significant zero bit to 1 and all of the rest to 0. | |
| template <typename Integral> | |
| constexpr Integral isols0b(Integral x) { | |
| return (~x) & (x + 1); | |
| } | |
| //Reset the trailing 1's in x | |
| //x86_64 AMD TBM: BLCFILL | |
| template <typename Integral> | |
| constexpr Integral rstt1(Integral x) { | |
| return x & (x + 1); | |
| } | |
| //Set all of the trailing 0's in x | |
| //x86_64 AMD TBM: BLSFILL | |
| template <typename Integral> | |
| constexpr Integral sett0(Integral x) { | |
| return x | (x - 1); | |
| } | |
| //Returns a mask with all of the trailing 0's set. | |
| template <typename Integral> | |
| constexpr Integral maskt0(Integral x) { | |
| return (~x) & (x-1); | |
| } | |
| //Returns a mask with all of the trailing 1's set. | |
| template <typename Integral> | |
| constexpr Integral maskt1(Integral x) { | |
| return ~((~x) | (x + 1)); | |
| } | |
| //Returns a mask with all of the trailing 0's and the least significant 1 bit set. | |
| //x86_64 BMI1: BLSMSK | |
| //x86_64 AMD TBM: TZMSK | |
| template <typename Integral> | |
| constexpr Integral maskt0ls1b(Integral x) { | |
| return (x-1) ^ x; | |
| } | |
| //Returns a mask with all of the trailing 1's and the least significant 0 bit set. | |
| template <typename Integral> | |
| constexpr Integral maskt1ls0b(Integral x) { | |
| return x ^ (x + 1); | |
| } | |
| //////////////////////////////////// | |
| //Bit and Byte reversal algorithms | |
| //////////////////////////////////// | |
| //Reverse each group of blocks of bits in x. | |
| // | |
| //bits_per_block == 1: reverses the bits of x | |
| //ARMv7: RBIT | |
| //EPIPHANY: BITR | |
| //bits_per_block == 2: reverses each pair of bits in x | |
| //bits_per_block == 4: reverses the nibbles in x | |
| //AVR: SWAP | |
| //bits_per_block == 8: reverses the bytes in x (assuming CHAR_BIT == 8). This is the traditional byte swap. | |
| //i386: bswap | |
| //ARMv5: REV | |
| //PDP-11: SWAB | |
| //gcc: __builtin_bswap[16|32|64](x) | |
| //(blocks_per_group == 2) ARMv6: REV16 | |
| //(blocks_per_group == 4) ARMv8: REV32 | |
| //bits_per_block == 16,32,etc.. reverses the words in x. | |
| //(bits_per_block == 16) MC68020: SWAP | |
| template <typename Integral> | |
| constexpr14 auto reverse_bits(Integral x, | |
| int subword_bits = 1, | |
| int group_subwords = 1) | |
| noexcept -> typename std::enable_if<std::is_unsigned<Integral>::value, Integral>::type { | |
| int group_sz = int(sizeof(Integral) * CHAR_BIT) / group_subwords; | |
| int k = group_sz - subword_bits; | |
| if(k & 1) x = shll(x & Integral(0x5555555555555555UL), 1) | shlr(x & Integral(0xAAAAAAAAAAAAAAAAUL), 1); | |
| if(k & 2) x = shll(x & Integral(0x3333333333333333UL), 2) | shlr(x & Integral(0xCCCCCCCCCCCCCCCCUL), 2); | |
| if(k & 4) x = shll(x & Integral(0x0F0F0F0F0F0F0F0FUL), 4) | shlr(x & Integral(0xF0F0F0F0F0F0F0F0UL), 4); | |
| //sizeof comparisons added to help compiler remove these checks for small integers | |
| if(sizeof(x) > 1 && k & 8) x = shll(x & Integral(0x00FF00FF00FF00FFUL), 8) | shlr(x & Integral(0xFF00FF00FF00FF00UL), 8); | |
| if(sizeof(x) > 2 && k & 16) x = shll(x & Integral(0x0000FFFF0000FFFFUL), 16) | shlr(x & Integral(0xFFFF0000FFFF0000UL), 16); | |
| if(sizeof(x) > 4 && k & 32) x = shll(x & Integral(0x00000000FFFFFFFFUL), 32) | shlr(x & Integral(0xFFFFFFFF00000000UL), 32); | |
| return x; | |
| } | |
| //Signed version calls unsigned to avoid sign extension issues | |
| template <typename Integral> | |
| constexpr14 auto reverse_bits(Integral x, | |
| int subword_bits = 1, | |
| int group_subwords = 1) | |
| noexcept -> typename std::enable_if<std::is_signed<Integral>::value, Integral>::type { | |
| return Integral(reverse_bits(typename std::make_unsigned<Integral>::type(x), subword_bits, group_subwords)); | |
| } | |
| //Byte reversal, simple wrapper around reverse_bits | |
| template <typename Integral> | |
| constexpr14 Integral reverse_bytes(Integral x, | |
| int bytes_per_block=1, | |
| int blocks_per_group = sizeof(Integral)) noexcept { | |
| return reverse_bits(x, CHAR_BIT * bytes_per_block, blocks_per_group); | |
| } | |
| //////////////////////////////////// | |
| //Single bit manipulation | |
| //////////////////////////////////// | |
| //Sets bit b in x, undefined behavior if b < 0 or b >= sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr Integral setbit(Integral x, int b) noexcept { | |
| return x | (Integral(1) << b); | |
| } | |
| //Resets bit b in x, undefined behavior if b < 0 or b >= sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr Integral rstbit(Integral x, int b) noexcept { | |
| return x & ~(Integral(1) << b); | |
| } | |
| //Flips bit b in x, undefined behavior if b < 0 or b >= sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr Integral flipbit(Integral x, int b) noexcept { | |
| return x ^ (Integral(1) << b); | |
| } | |
| //Return whether or not bit b is set in x, undefined behavior if b < 0 or b >= sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr bool testbit(Integral x, int b) noexcept { | |
| return x & (Integral(1) << b); | |
| } | |
| //////////////////////////////////// | |
| //Range of bits manipulation | |
| //////////////////////////////////// | |
| //Resets all bits >= position b, nop if b > sizeof(x) * CHAR_BIT | |
| //x86_64 w/ BMI2: BZHI | |
| template <typename Integral> | |
| constexpr Integral rstbitsge(Integral x, int b) noexcept { | |
| return x & ((Integral(1) << b)-1); | |
| } | |
| //Resets all bits < position b, nop if b > sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr Integral rstbitsle(Integral x, int b) noexcept { | |
| return x & ~((Integral(1) << (b+1))-1); | |
| } | |
| //Set all bits >= position b, nop if b > sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr Integral setbitsge(Integral x, int b) noexcept { | |
| return x | ~((Integral(1) << b)-1); | |
| } | |
| //Sets all bits < position b, nop if b > sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr Integral setbitsle(Integral x, int b) noexcept { | |
| return x | ((Integral(1) << (b+1))-1); | |
| } | |
| //Flip all bits >= position b, nop if b > sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr Integral flipbitsge(Integral x, int b) noexcept { | |
| return x ^ ~((Integral(1) << b)-1); | |
| } | |
| //Flip all bits < position b, nop if b > sizeof(x) * CHAR_BIT | |
| template <typename Integral> | |
| constexpr Integral flipbitsle(Integral x, int b) noexcept { | |
| return x ^ ((Integral(1) << (b+1))-1); | |
| } | |
| //////////////////////////////////// | |
| //Power of 2 manipulation | |
| //////////////////////////////////// | |
| //Returns true if x is a power of 2 | |
| //Application: Integral template arguments, add optimizations for power of 2 | |
| template <typename Integral> | |
| constexpr bool ispow2(Integral x) noexcept { | |
| return x > 0 && (x & (x-1)) == 0; | |
| //return popcount(x) == 1; | |
| } | |
| //Round up to the next power of 2 | |
| //Application: Growable containers whose size must be a power of 2 | |
| //Application: Extending a 2d image size to a power of 2 for 3d graphics libraries (OpenGL/DirectX) | |
| template <typename Integral> | |
| constexpr14 Integral ceilp2(Integral x) noexcept { | |
| x = x-1; | |
| x |= shlr(x, 1); | |
| x |= shlr(x, 2); | |
| x |= shlr(x, 4); | |
| if(sizeof(x) > 1) { | |
| x |= shlr(x, 8); | |
| if(sizeof(x) > 2) { | |
| x |= shlr(x, 16); | |
| if(sizeof(x) > 4) { | |
| x |= shlr(x, 32); | |
| } | |
| } | |
| } | |
| return x + 1; | |
| } | |
| //Round down to the previous power of 2 | |
| //Application: See ceilp2 | |
| template <typename Integral> | |
| constexpr14 Integral floorp2(Integral x) noexcept { | |
| x |= shlr(x, 1); | |
| x |= shlr(x, 2); | |
| x |= shlr(x, 4); | |
| if(sizeof(x) > 1) { | |
| x |= shlr(x, 8); | |
| if(sizeof(x) > 2) { | |
| x |= shlr(x, 16); | |
| if(sizeof(x) > 4) { | |
| x |= shlr(x, 32); | |
| } | |
| } | |
| } | |
| return x - shlr(x, 1); | |
| } | |
| //////////////////////////////////// | |
| //Saturated Arithmetic | |
| //////////////////////////////////// | |
| //Perform saturated addition on l and r. | |
| //ARMv7 DSP extensions: QADD | |
| template <typename IntegralL, typename IntegralR> | |
| constexpr auto satadd(IntegralL l, IntegralR r) noexcept -> decltype(l + r) { | |
| typedef decltype(l + r) LR; | |
| return LR(l) > std::numeric_limits<LR>::max() - LR(r) ? std::numeric_limits<LR>::max() : l + r; | |
| } | |
| //Perform saturated subtraction on l and r. | |
| //ARMv7 DSP extensions: QSUB | |
| template <typename IntegralL, typename IntegralR> | |
| constexpr auto satsub(IntegralL l, IntegralR r) noexcept -> decltype(l - r) { | |
| typedef decltype(l + r) LR; | |
| return LR(l) < std::numeric_limits<LR>::min() + LR(r) ? std::numeric_limits<LR>::min() : l - r; | |
| } | |
| //////////////////////////////////// | |
| //Pointer and size alignment helpers | |
| //////////////////////////////////// | |
| //Returns true if t is aligned to a | |
| template <typename Integral> | |
| constexpr bool is_aligned(Integral t, size_t a) noexcept { | |
| return ((t & (a-1)) == 0); | |
| } | |
| bool is_aligned(void* t, size_t a) noexcept { | |
| return is_aligned(uintptr_t(t), a); | |
| } | |
| //Returns the smallest number n when n >= val && is_aligned(n, align). align must be a power of 2! | |
| template <typename Integral> | |
| constexpr Integral align_up(Integral val, size_t a) noexcept { | |
| return ((val + (a -1)) & -a); | |
| } | |
| void* align_up(void* val, size_t a) noexcept { | |
| return (void*)align_up(uintptr_t(val), a); | |
| } | |
| //Returns the largest number n when n <= val && is_aligned(n, align). align must be a power of 2! | |
| template <typename Integral> | |
| constexpr Integral align_down(Integral val, size_t a) noexcept { | |
| return val & -a; | |
| } | |
| void* align_down(void* val, size_t a) noexcept { | |
| return (void*)align_down(uintptr_t(val), a); | |
| } | |
| /////////////////////////////////// | |
| //Bit Shuffling | |
| /////////////////////////////////// | |
| //Outer Perfect Shuffle | |
| template <typename Integral> | |
| constexpr14 Integral outer_pshuffle(Integral x) noexcept { | |
| Integral t = 0; | |
| if(sizeof(x) > 4) { | |
| t = (x ^ shlr(x, 16)) & Integral(0x00000000FFFF0000UL); | |
| x = x ^ t ^ shll(t, 16); | |
| } | |
| if(sizeof(x) > 2) { | |
| t = (x ^ shlr(x, 8)) & Integral(0x0000FF000000FF00UL); | |
| x = x ^ t ^ shll(t, 8); | |
| } | |
| if(sizeof(x) > 1) { | |
| t = (x ^ shlr(x, 4)) & Integral(0x00F000F000F000F0UL); | |
| x = x ^ t ^ shll(t, 4); | |
| } | |
| t = (x ^ shlr(x, 2)) & Integral(0x0C0C0C0C0C0C0C0CUL); | |
| x = x ^ t ^ shll(t, 2); | |
| t = (x ^ shlr(x, 1)) & Integral(0x2222222222222222UL); | |
| x = x ^ t ^ shll(t, 1); | |
| return x; | |
| } | |
| template <typename Integral> | |
| constexpr14 Integral outer_punshuffle(Integral x) noexcept { | |
| Integral t = 0; | |
| t = (x ^ shlr(x, 1)) & Integral(0x2222222222222222UL); | |
| x = x ^ t ^ shll(t, 1); | |
| t = (x ^ shlr(x, 2)) & Integral(0x0C0C0C0C0C0C0C0CUL); | |
| x = x ^ t ^ shll(t, 2); | |
| if(sizeof(x) > 1) { | |
| t = (x ^ shlr(x, 4)) & Integral(0x00F000F000F000F0UL); | |
| x = x ^ t ^ shll(t, 4); | |
| } | |
| if(sizeof(x) > 2) { | |
| t = (x ^ shlr(x, 8)) & Integral(0x0000FF000000FF00UL); | |
| x = x ^ t ^ shll(t, 8); | |
| } | |
| if(sizeof(x) > 4) { | |
| t = (x ^ shlr(x, 16)) & Integral(0x00000000FFFF0000UL); | |
| x = x ^ t ^ shll(t, 16); | |
| } | |
| return x; | |
| } | |
| template <typename Integral> | |
| constexpr14 Integral inner_pshuffle(Integral x) noexcept { | |
| return outer_pshuffle(reverse_bits(x, sizeof(x)*CHAR_BIT/2, 2)); | |
| } | |
| template <typename Integral> | |
| constexpr14 Integral inner_punshuffle(Integral x) noexcept { | |
| return reverse_bits(outer_punshuffle(x), sizeof(x)*CHAR_BIT/2, 2); | |
| } | |
| /////////////////////////////////// | |
| //Bits Deposit and Extract | |
| /////////////////////////////////// | |
| //Parallel Bits Deposit | |
| //x HGFEDCBA | |
| //mask 01100100 | |
| //res 0CB00A00 | |
| //x86_64 BMI2: PDEP | |
| template <typename Integral> | |
| constexpr14 Integral deposit_bits(Integral x, Integral mask) { | |
| Integral res = 0; | |
| for(Integral bb = 1; mask != 0; bb += bb) { | |
| if(x & bb) { | |
| res |= mask & (-mask); | |
| } | |
| mask &= (mask - 1); | |
| } | |
| return res; | |
| } | |
| //Parallel Bits Extract | |
| //x HGFEDCBA | |
| //mask 01100100 | |
| //res 00000GFC | |
| //x86_64 BMI2: PEXT | |
| template <typename Integral> | |
| constexpr14 Integral extract_bits(Integral x, Integral mask) { | |
| Integral res = 0; | |
| for(Integral bb = 1; mask != 0; bb += bb) { | |
| if(x & mask & -mask) { | |
| res |= bb; | |
| } | |
| mask &= (mask - 1); | |
| } | |
| return res; | |
| } | |
| } //namespace std | |
| #endif |