Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specialized double-SHA256 with 64 byte inputs with SSE4.1 and AVX2 #13191

Merged
merged 7 commits into from Jun 4, 2018
Copy path View file
@@ -304,6 +304,8 @@ fi
# be compiled with them, rather that specific objects/libs may use them after checking for runtime
# compatibility.
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
@@ -327,6 +329,44 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
)
CXXFLAGS="$TEMP_CXXFLAGS"

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SSE41_CXXFLAGS"
AC_MSG_CHECKING(for SSE4.1 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#if defined(_MSC_VER)
#include <immintrin.h>
#elif defined(__GNUC__)
#include <x86intrin.h>
#endif
]],[[
__m128i l = _mm_set1_epi32(0);
return _mm_extract_epi32(l, 3);
]])],
[ AC_MSG_RESULT(yes); enable_sse41=yes; AC_DEFINE(ENABLE_SSE41, 1, [Define this symbol to build code that uses SSE4.1 intrinsics]) ],
[ AC_MSG_RESULT(no)]
)
CXXFLAGS="$TEMP_CXXFLAGS"

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $AVX2_CXXFLAGS"
AC_MSG_CHECKING(for AVX2 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#if defined(_MSC_VER)
#include <immintrin.h>
#elif defined(__GNUC__) && defined(__AVX2__)
#include <x86intrin.h>
#endif
]],[[
__m256i l = _mm256_set1_epi32(0);
return _mm256_extract_epi32(l, 7);
]])],
[ AC_MSG_RESULT(yes); enable_avx2=yes; AC_DEFINE(ENABLE_AVX2, 1, [Define this symbol to build code that uses AVX2 intrinsics]) ],
[ AC_MSG_RESULT(no)]
)
CXXFLAGS="$TEMP_CXXFLAGS"

CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"

AC_ARG_WITH([utils],
@@ -1245,6 +1285,8 @@ AM_CONDITIONAL([USE_LCOV],[test x$use_lcov = xyes])
AM_CONDITIONAL([GLIBC_BACK_COMPAT],[test x$use_glibc_compat = xyes])
AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])

AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
@@ -1283,6 +1325,8 @@ AC_SUBST(PIE_FLAGS)
AC_SUBST(SANITIZER_CXXFLAGS)
AC_SUBST(SANITIZER_LDFLAGS)
AC_SUBST(SSE42_CXXFLAGS)
AC_SUBST(SSE41_CXXFLAGS)
AC_SUBST(AVX2_CXXFLAGS)
AC_SUBST(LIBTOOL_APP_LDFLAGS)
AC_SUBST(USE_UPNP)
AC_SUBST(USE_QRCODE)
Copy path View file
@@ -30,6 +30,8 @@ LIBBITCOIN_CONSENSUS=libbitcoin_consensus.a
LIBBITCOIN_CLI=libbitcoin_cli.a
LIBBITCOIN_UTIL=libbitcoin_util.a
LIBBITCOIN_CRYPTO=crypto/libbitcoin_crypto.a
LIBBITCOIN_CRYPTO_SSE41=crypto/libbitcoin_crypto_sse41.a
LIBBITCOIN_CRYPTO_AVX2=crypto/libbitcoin_crypto_avx2.a
LIBBITCOINQT=qt/libbitcoinqt.a
LIBSECP256K1=secp256k1/libsecp256k1.la

@@ -50,6 +52,8 @@ $(LIBSECP256K1): $(wildcard secp256k1/src/*) $(wildcard secp256k1/include/*)
# But to build the less dependent modules first, we manually select their order here:
EXTRA_LIBRARIES += \
$(LIBBITCOIN_CRYPTO) \
$(LIBBITCOIN_CRYPTO_SSE41) \
$(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBBITCOIN_UTIL) \
$(LIBBITCOIN_COMMON) \
$(LIBBITCOIN_CONSENSUS) \
@@ -289,6 +293,22 @@ if USE_ASM
crypto_libbitcoin_crypto_a_SOURCES += crypto/sha256_sse4.cpp
endif

crypto_libbitcoin_crypto_sse41_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
crypto_libbitcoin_crypto_sse41_a_CPPFLAGS = $(AM_CPPFLAGS)
if ENABLE_SSE41
crypto_libbitcoin_crypto_sse41_a_CXXFLAGS += $(SSE41_CXXFLAGS)
crypto_libbitcoin_crypto_sse41_a_CPPFLAGS += -DENABLE_SSE41
endif
crypto_libbitcoin_crypto_sse41_a_SOURCES = crypto/sha256_sse41.cpp

crypto_libbitcoin_crypto_avx2_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS = $(AM_CPPFLAGS)
if ENABLE_AVX2
crypto_libbitcoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
endif
crypto_libbitcoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp

# consensus: shared between all executables that validate any consensus rules.
libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
libbitcoin_consensus_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
@@ -411,6 +431,8 @@ bitcoind_LDADD = \
$(LIBBITCOIN_ZMQ) \
$(LIBBITCOIN_CONSENSUS) \
$(LIBBITCOIN_CRYPTO) \
$(LIBBITCOIN_CRYPTO_SSE41) \
$(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBLEVELDB) \
$(LIBLEVELDB_SSE42) \
$(LIBMEMENV) \
@@ -432,7 +454,9 @@ bitcoin_cli_LDADD = \
$(LIBBITCOIN_CLI) \
$(LIBUNIVALUE) \
$(LIBBITCOIN_UTIL) \
$(LIBBITCOIN_CRYPTO)
$(LIBBITCOIN_CRYPTO) \
$(LIBBITCOIN_CRYPTO_SSE41) \
$(LIBBITCOIN_CRYPTO_AVX2)

bitcoin_cli_LDADD += $(BOOST_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(EVENT_LIBS)
#
@@ -453,6 +477,8 @@ bitcoin_tx_LDADD = \
$(LIBBITCOIN_UTIL) \
$(LIBBITCOIN_CONSENSUS) \
$(LIBBITCOIN_CRYPTO) \
$(LIBBITCOIN_CRYPTO_SSE41) \
$(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBSECP256K1)

bitcoin_tx_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)
Copy path View file
@@ -21,6 +21,7 @@ bench_bench_bitcoin_SOURCES = \
bench/rollingbloom.cpp \
bench/crypto_hash.cpp \
bench/ccoins_caching.cpp \
bench/merkle_root.cpp \
bench/mempool_eviction.cpp \
bench/verify_script.cpp \
bench/base58.cpp \
@@ -38,6 +39,8 @@ bench_bench_bitcoin_LDADD = \
$(LIBBITCOIN_UTIL) \
$(LIBBITCOIN_CONSENSUS) \
$(LIBBITCOIN_CRYPTO) \
$(LIBBITCOIN_CRYPTO_SSE41) \
$(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBLEVELDB) \
$(LIBLEVELDB_SSE42) \
$(LIBMEMENV) \
Copy path View file
@@ -407,7 +407,7 @@ endif
if ENABLE_ZMQ
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_ZMQ) $(ZMQ_LIBS)
endif
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBUNIVALUE) $(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) \
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBBITCOIN_CRYPTO_AVX2) $(LIBUNIVALUE) $(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) \
$(BOOST_LIBS) $(QT_LIBS) $(QT_DBUS_LIBS) $(QR_LIBS) $(PROTOBUF_LIBS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(MINIUPNPC_LIBS) $(LIBSECP256K1) \
$(EVENT_PTHREADS_LIBS) $(EVENT_LIBS)
qt_bitcoin_qt_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(QT_LDFLAGS) $(LIBTOOL_APP_LDFLAGS)
Copy path View file
@@ -62,7 +62,7 @@ endif
if ENABLE_ZMQ
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_ZMQ) $(ZMQ_LIBS)
endif
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBUNIVALUE) $(LIBLEVELDB) \
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBBITCOIN_CRYPTO_AVX2) $(LIBUNIVALUE) $(LIBLEVELDB) \
$(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(QT_DBUS_LIBS) $(QT_TEST_LIBS) $(QT_LIBS) \
$(QR_LIBS) $(PROTOBUF_LIBS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(MINIUPNPC_LIBS) $(LIBSECP256K1) \
$(EVENT_PTHREADS_LIBS) $(EVENT_LIBS)
Copy path View file
@@ -108,7 +108,8 @@ test_test_bitcoin_LDADD =
if ENABLE_WALLET
test_test_bitcoin_LDADD += $(LIBBITCOIN_WALLET)
endif
test_test_bitcoin_LDADD += $(LIBBITCOIN_SERVER) $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBUNIVALUE) \

test_test_bitcoin_LDADD += $(LIBBITCOIN_SERVER) $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBBITCOIN_CRYPTO_AVX2) $(LIBUNIVALUE) \
$(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(LIBSECP256K1) $(EVENT_LIBS) $(EVENT_PTHREADS_LIBS)
test_test_bitcoin_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)

@@ -133,6 +134,8 @@ test_test_bitcoin_fuzzy_LDADD = \
$(LIBBITCOIN_UTIL) \
$(LIBBITCOIN_CONSENSUS) \
$(LIBBITCOIN_CRYPTO) \
$(LIBBITCOIN_CRYPTO_SSE41) \
$(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBSECP256K1)

test_test_bitcoin_fuzzy_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)
Copy path View file
@@ -52,6 +52,14 @@ static void SHA256_32b(benchmark::State& state)
}
}

static void SHA256D64_1024(benchmark::State& state)
{
std::vector<uint8_t> in(64 * 1024, 0);
while (state.KeepRunning()) {
SHA256D64(in.data(), in.data(), 1024);
}
}

static void SHA512(benchmark::State& state)
{
uint8_t hash[CSHA512::OUTPUT_SIZE];
@@ -94,5 +102,6 @@ BENCHMARK(SHA512, 330);

BENCHMARK(SHA256_32b, 4700 * 1000);
BENCHMARK(SipHash_32b, 40 * 1000 * 1000);
BENCHMARK(SHA256D64_1024, 7400);
BENCHMARK(FastRandom_32bit, 110 * 1000 * 1000);
BENCHMARK(FastRandom_1bit, 440 * 1000 * 1000);
Copy path View file
@@ -0,0 +1,26 @@
// Copyright (c) 2016 The Bitcoin Core developers

This comment has been minimized.

Copy link
@Empact
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include "bench.h"

This comment has been minimized.

Copy link
@Empact

Empact May 30, 2018

Member

#include <


#include "uint256.h"
#include "random.h"
#include "consensus/merkle.h"

static void MerkleRoot(benchmark::State& state)
{
FastRandomContext rng(true);
std::vector<uint256> leaves;
leaves.resize(9001);
for (auto& item : leaves) {
item = rng.rand256();
}
while (state.KeepRunning()) {
bool mutation = false;
uint256 hash = ComputeMerkleRoot(std::vector<uint256>(leaves), &mutation);
leaves[mutation] = hash;
}
}

BENCHMARK(MerkleRoot, 800);
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.