Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SHA256 implementations based on Intel SHA Extensions #13386

Merged
merged 3 commits into from Jul 9, 2018
Merged
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.
+464 −32
Diff settings

Always

Just for now

Copy path View file
@@ -319,6 +319,7 @@ fi
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-msse4 -msha],[[SHANI_CXXFLAGS="-msse4 -msha"]],,[[$CXXFLAG_WERROR]])

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
@@ -347,11 +348,7 @@ CXXFLAGS="$CXXFLAGS $SSE41_CXXFLAGS"
AC_MSG_CHECKING(for SSE4.1 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#if defined(_MSC_VER)
#include <immintrin.h>
#elif defined(__GNUC__)
#include <x86intrin.h>
#endif
]],[[
__m128i l = _mm_set1_epi32(0);
return _mm_extract_epi32(l, 3);
@@ -366,11 +363,7 @@ CXXFLAGS="$CXXFLAGS $AVX2_CXXFLAGS"
AC_MSG_CHECKING(for AVX2 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#if defined(_MSC_VER)
#include <immintrin.h>
#elif defined(__GNUC__) && defined(__AVX2__)
#include <x86intrin.h>
#endif
]],[[
__m256i l = _mm256_set1_epi32(0);
return _mm256_extract_epi32(l, 7);
@@ -380,6 +373,23 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
)
CXXFLAGS="$TEMP_CXXFLAGS"

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SHANI_CXXFLAGS"
AC_MSG_CHECKING(for SHA-NI intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#include <immintrin.h>
]],[[
__m128i i = _mm_set1_epi32(0);
__m128i j = _mm_set1_epi32(1);
__m128i k = _mm_set1_epi32(2);
return _mm_extract_epi32(_mm_sha256rnds2_epu32(i, i, k), 0);
]])],
[ AC_MSG_RESULT(yes); enable_shani=yes; AC_DEFINE(ENABLE_SHANI, 1, [Define this symbol to build code that uses SHA-NI intrinsics]) ],
[ AC_MSG_RESULT(no)]
)
CXXFLAGS="$TEMP_CXXFLAGS"

CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"

AC_ARG_WITH([utils],
@@ -1300,6 +1310,7 @@ AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
AM_CONDITIONAL([ENABLE_SHANI],[test x$enable_shani = xyes])
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])

AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
@@ -1344,6 +1355,7 @@ AC_SUBST(SANITIZER_LDFLAGS)
AC_SUBST(SSE42_CXXFLAGS)
AC_SUBST(SSE41_CXXFLAGS)
AC_SUBST(AVX2_CXXFLAGS)
AC_SUBST(SHANI_CXXFLAGS)
AC_SUBST(LIBTOOL_APP_LDFLAGS)
AC_SUBST(USE_UPNP)
AC_SUBST(USE_QRCODE)
Copy path View file
@@ -52,6 +52,10 @@ if ENABLE_AVX2
LIBBITCOIN_CRYPTO_AVX2 = crypto/libbitcoin_crypto_avx2.a
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_AVX2)
endif
if ENABLE_SHANI
LIBBITCOIN_CRYPTO_SHANI = crypto/libbitcoin_crypto_shani.a
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SHANI)
endif

$(LIBSECP256K1): $(wildcard secp256k1/src/*) $(wildcard secp256k1/include/*)
$(AM_V_at)$(MAKE) $(AM_MAKEFLAGS) -C $(@D) $(@F)
@@ -314,6 +318,12 @@ crypto_libbitcoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
crypto_libbitcoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp

crypto_libbitcoin_crypto_shani_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
crypto_libbitcoin_crypto_shani_a_CPPFLAGS = $(AM_CPPFLAGS)
crypto_libbitcoin_crypto_shani_a_CXXFLAGS += $(SHANI_CXXFLAGS)
crypto_libbitcoin_crypto_shani_a_CPPFLAGS += -DENABLE_SHANI
crypto_libbitcoin_crypto_shani_a_SOURCES = crypto/sha256_shani.cpp

# consensus: shared between all executables that validate any consensus rules.
libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
libbitcoin_consensus_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
Copy path View file
@@ -137,6 +137,7 @@ test_test_bitcoin_fuzzy_LDADD = \
$(LIBBITCOIN_CRYPTO) \
$(LIBBITCOIN_CRYPTO_SSE41) \
$(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBBITCOIN_CRYPTO_SHANI) \
$(LIBSECP256K1)

test_test_bitcoin_fuzzy_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)
Copy path View file
@@ -29,6 +29,16 @@ namespace sha256d64_avx2
void Transform_8way(unsigned char* out, const unsigned char* in);
}

namespace sha256d64_shani
{
void Transform_2way(unsigned char* out, const unsigned char* in);
}

namespace sha256_shani
{
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
}

// Internal implementation code.
namespace
{
@@ -448,6 +458,7 @@ void TransformD64Wrapper(unsigned char* out, const unsigned char* in)

TransformType Transform = sha256::Transform;
TransformD64Type TransformD64 = sha256::TransformD64;
TransformD64Type TransformD64_2way = nullptr;
TransformD64Type TransformD64_4way = nullptr;
TransformD64Type TransformD64_8way = nullptr;

@@ -512,6 +523,13 @@ bool SelfTest() {
TransformD64(out, data + 1);
if (!std::equal(out, out + 32, result_d64)) return false;

// Test TransformD64_2way, if available.
if (TransformD64_2way) {
unsigned char out[64];
TransformD64_2way(out, data + 1);
if (!std::equal(out, out + 64, result_d64)) return false;
}

// Test TransformD64_4way, if available.
if (TransformD64_4way) {
unsigned char out[128];
@@ -552,32 +570,64 @@ std::string SHA256AutoDetect()
{
std::string ret = "standard";
#if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__) || defined(__i386__))
(void)AVXEnabled; // Silence unused warning (in case ENABLE_AVX2 is not defined)
bool have_sse4 = false;
bool have_xsave = false;
bool have_avx = false;
bool have_avx2 = false;
bool have_shani = false;
bool enabled_avx = false;

(void)AVXEnabled;
(void)have_sse4;
(void)have_avx;
(void)have_xsave;
(void)have_avx2;
(void)have_shani;
(void)enabled_avx;

uint32_t eax, ebx, ecx, edx;
cpuid(1, 0, eax, ebx, ecx, edx);
if ((ecx >> 19) & 1) {
have_sse4 = (ecx >> 19) & 1;
have_xsave = (ecx >> 27) & 1;
have_avx = (ecx >> 28) & 1;
if (have_xsave && have_avx) {
enabled_avx = AVXEnabled();
}
if (have_sse4) {
cpuid(7, 0, eax, ebx, ecx, edx);
have_avx2 = (ebx >> 5) & 1;
have_shani = (ebx >> 29) & 1;
}

#if defined(ENABLE_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
if (have_shani) {
Transform = sha256_shani::Transform;
TransformD64 = TransformD64Wrapper<sha256_shani::Transform>;
TransformD64_2way = sha256d64_shani::Transform_2way;
ret = "shani(1way,2way)";
have_sse4 = false; // Disable SSE4/AVX2;
have_avx2 = false;
}
#endif

if (have_sse4) {
#if defined(__x86_64__) || defined(__amd64__)
Transform = sha256_sse4::Transform;
TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>;
ret = "sse4(1way)";
#endif
#if defined(ENABLE_SSE41) && !defined(BUILD_BITCOIN_INTERNAL)
TransformD64_4way = sha256d64_sse41::Transform_4way;
ret = "sse4(1way+4way)";
#if defined(ENABLE_AVX2) && !defined(BUILD_BITCOIN_INTERNAL)
if (((ecx >> 27) & 1) && ((ecx >> 28) & 1)) { // XSAVE and AVX
cpuid(7, 0, eax, ebx, ecx, edx);
if ((ebx >> 5) & 1) { // AVX2 flag
if (AVXEnabled()) { // OS has enabled AVX registers
TransformD64_8way = sha256d64_avx2::Transform_8way;
ret += ",avx2(8way)";
}
}
}
#endif
#else
ret = "sse4";
ret += ",sse41(4way)";
#endif
}

#if defined(ENABLE_AVX2) && !defined(BUILD_BITCOIN_INTERNAL)
if (have_avx2 && have_avx && enabled_avx) {
TransformD64_8way = sha256d64_avx2::Transform_8way;
ret += ",avx2(8way)";
}
#endif
#endif

assert(SelfTest());
@@ -659,6 +709,14 @@ void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
blocks -= 4;
}
}
if (TransformD64_2way) {
while (blocks >= 2) {
TransformD64_2way(out, in);
out += 64;
in += 128;
blocks -= 2;
}
}
while (blocks) {
TransformD64(out, in);
out += 32;
Copy path View file
@@ -1,11 +1,7 @@
#ifdef ENABLE_AVX2

#include <stdint.h>
#if defined(_MSC_VER)
#include <immintrin.h>
#elif defined(__GNUC__)
#include <x86intrin.h>
#endif

#include <crypto/sha256.h>
#include <crypto/common.h>
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.