Skip to content

Commit

Permalink
Merge pull request #195 from chfast/cleanup-umul
Browse files Browse the repository at this point in the history
Cleanup umul()
  • Loading branch information
chfast committed Mar 4, 2021
2 parents 1405fbb + 4ede30d commit 6c39eb7
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 40 deletions.
34 changes: 16 additions & 18 deletions include/intx/int128.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,22 @@ inline constexpr uint128 operator>>(uint128 x, uint128 shift) noexcept
/// Multiplication
/// @{

/// Portable full unsigned multiplication 64 x 64 -> 128.
inline constexpr uint128 constexpr_umul(uint64_t x, uint64_t y) noexcept
/// Full unsigned multiplication 64 x 64 -> 128.
inline constexpr uint128 umul(uint64_t x, uint64_t y) noexcept
{
#if INTX_HAS_BUILTIN_INT128
return builtin_uint128{x} * builtin_uint128{y};
#elif defined(_MSC_VER) && _MSC_VER >= 1925
if (!is_constant_evaluated())
{
unsigned __int64 hi = 0;
const auto lo = _umul128(x, y, &hi);
return {hi, lo};
}
// For constexpr fallback to portable variant.
#endif

// Portable full unsigned multiplication 64 x 64 -> 128.
uint64_t xl = x & 0xffffffff;
uint64_t xh = x >> 32;
uint64_t yl = y & 0xffffffff;
Expand All @@ -389,21 +402,6 @@ inline constexpr uint128 constexpr_umul(uint64_t x, uint64_t y) noexcept
return {hi, lo};
}

/// Full unsigned multiplication 64 x 64 -> 128.
inline uint128 umul(uint64_t x, uint64_t y) noexcept
{
#if INTX_HAS_BUILTIN_INT128
const auto p = builtin_uint128{x} * builtin_uint128{y};
return {uint64_t(p >> 64), uint64_t(p)};
#elif defined(_MSC_VER)
unsigned __int64 hi;
const auto lo = _umul128(x, y, &hi);
return {hi, lo};
#else
return constexpr_umul(x, y);
#endif
}

inline uint128 operator*(uint128 x, uint128 y) noexcept
{
auto p = umul(x.lo, y.lo);
Expand All @@ -413,7 +411,7 @@ inline uint128 operator*(uint128 x, uint128 y) noexcept

inline constexpr uint128 constexpr_mul(uint128 x, uint128 y) noexcept
{
auto p = constexpr_umul(x.lo, y.lo);
auto p = umul(x.lo, y.lo);
p.hi += (x.lo * y.hi) + (x.hi * y.lo);
return {p.hi, p.lo};
}
Expand Down
22 changes: 2 additions & 20 deletions include/intx/intx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ inline constexpr uint<N>& operator-=(uint<N>& x, const T& y) noexcept


template <unsigned N>
inline uint<2 * N> umul(const uint<N>& x, const uint<N>& y) noexcept
inline constexpr uint<2 * N> umul(const uint<N>& x, const uint<N>& y) noexcept
{
const auto t0 = umul(x.lo, y.lo);
const auto t1 = umul(x.hi, y.lo);
Expand All @@ -482,24 +482,6 @@ inline uint<2 * N> umul(const uint<N>& x, const uint<N>& y) noexcept
return {hi, lo};
}

template <unsigned N>
inline constexpr uint<2 * N> constexpr_umul(const uint<N>& x, const uint<N>& y) noexcept
{
auto t0 = constexpr_umul(x.lo, y.lo);
auto t1 = constexpr_umul(x.hi, y.lo);
auto t2 = constexpr_umul(x.lo, y.hi);
auto t3 = constexpr_umul(x.hi, y.hi);

auto u1 = t1 + t0.hi;
auto u2 = t2 + u1.lo;

auto lo = (u2 << (num_bits(x) / 2)) | t0.lo;
auto hi = t3 + u2.hi + u1.hi;

return {hi, lo};
}


template <unsigned N>
inline uint<N> sqr(const uint<N>& a) noexcept
{
Expand All @@ -515,7 +497,7 @@ inline uint<N> sqr(const uint<N>& a) noexcept
template <unsigned N>
inline constexpr uint<N> constexpr_mul(const uint<N>& a, const uint<N>& b) noexcept
{
auto t = constexpr_umul(a.lo, b.lo);
auto t = umul(a.lo, b.lo);
auto hi = constexpr_mul(a.lo, b.hi) + constexpr_mul(a.hi, b.lo) + t.hi;
return {hi, t.lo};
}
Expand Down
1 change: 0 additions & 1 deletion test/benchmarks/bench_int128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,4 @@ static void umul128(benchmark::State& state)
benchmark::DoNotOptimize(ahi);
}
}
BENCHMARK_TEMPLATE(umul128, intx::uint128, intx::constexpr_umul);
BENCHMARK_TEMPLATE(umul128, intx::uint128, intx::umul);
15 changes: 14 additions & 1 deletion test/unittests/test_int128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,13 +411,26 @@ TEST(int128, umul)
constexpr uint64_t inputs[] = {12243, 12503, 53501, 62950, 682017770, 1164206252, 1693374163,
2079516117, 7043980147839196358, 12005172997151200154u, 15099684930315651455u,
17254606825257761760u};
constexpr uint128 outputs[] = {
intx::umul(inputs[0], inputs[1]),
intx::umul(inputs[1], inputs[2]),
intx::umul(inputs[2], inputs[3]),
intx::umul(inputs[3], inputs[4]),
intx::umul(inputs[4], inputs[5]),
intx::umul(inputs[5], inputs[6]),
intx::umul(inputs[6], inputs[7]),
intx::umul(inputs[7], inputs[8]),
intx::umul(inputs[8], inputs[9]),
intx::umul(inputs[9], inputs[10]),
intx::umul(inputs[10], inputs[11]),
};

for (size_t i = 1; i < (sizeof(inputs) / sizeof(inputs[0])); ++i)
{
const auto x = inputs[i - 1];
const auto y = inputs[i];

const auto generic = intx::constexpr_umul(x, y);
const auto generic = outputs[i - 1];
const auto best = intx::umul(x, y);

EXPECT_EQ(generic, best) << x << " x " << y;
Expand Down

0 comments on commit 6c39eb7

Please sign in to comment.