Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add avx512 instrinsic #13989

Open
wants to merge 2 commits into
base: master
from
Open
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.
+417 −4
Diff settings

Always

Just for now

@@ -321,6 +321,7 @@ fi
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-mavx -mavx2 -mavx512f],[[AVX512_CXXFLAGS="-mavx -mavx2 -mavx512f"]],,[[$CXXFLAG_WERROR]])
AX_CHECK_COMPILE_FLAG([-msse4 -msha],[[SHANI_CXXFLAGS="-msse4 -msha"]],,[[$CXXFLAG_WERROR]])

TEMP_CXXFLAGS="$CXXFLAGS"
@@ -375,6 +376,27 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
)
CXXFLAGS="$TEMP_CXXFLAGS"

case $host in
*mingw*)

This comment has been minimized.

Copy link
@luke-jr

luke-jr Aug 28, 2018

Member

Why is this disallowed on mingw? Add a comment...

This comment has been minimized.

This comment has been minimized.

Copy link
@luke-jr

luke-jr Aug 28, 2018

Member

Shouldn't your below test cleanly fail in that scenario?

This comment has been minimized.

Copy link
@fingera

fingera Aug 28, 2018

Author Contributor

Most servers are linux and support avx512?

This comment has been minimized.

Copy link
@luke-jr

luke-jr Aug 28, 2018

Member

That's irrelevant...

This comment has been minimized.

Copy link
@fingera

fingera Aug 28, 2018

Author Contributor

How do I do? update ci mingw version?

This comment has been minimized.

Copy link
@luke-jr

luke-jr Aug 28, 2018

Member

You shouldn't need to.

;;
*)
TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $AVX512_CXXFLAGS"
AC_MSG_CHECKING(for AVX512 intrinsics)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[

This comment has been minimized.

Copy link
@luke-jr

luke-jr Aug 28, 2018

Member

Use AC_LINK_IFELSE here instead

This comment has been minimized.

Copy link
@fingera

fingera Aug 28, 2018

Author Contributor

it's not a link error, mingw generate seh directive assembly >xmm16, but assembler can't work with directive >xmm16.

.seh_savexmm	%xmm20, 256

solution other:
make ac_lang_program more than 50 lines like this:
fingera@585a8c8

This comment has been minimized.

Copy link
@fingera

fingera Aug 31, 2018

Author Contributor

Add a little code to make mingw generate xmm register more than 16 is ok? @luke-jr

#include <stdint.h>
#include <immintrin.h>
]],[[
__m512i l = _mm512_set1_epi32(0);
return _mm_extract_epi32(_mm512_extracti32x4_epi32(l, 3), 3);
]])],
[ AC_MSG_RESULT(yes); enable_avx512=yes; AC_DEFINE(ENABLE_AVX512, 1, [Define this symbol to build code that uses AVX512 intrinsics]) ],
[ AC_MSG_RESULT(no)]
)
CXXFLAGS="$TEMP_CXXFLAGS"
;;
esac

TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SHANI_CXXFLAGS"
AC_MSG_CHECKING(for SHA-NI intrinsics)
@@ -1335,6 +1357,7 @@ AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
AM_CONDITIONAL([ENABLE_AVX512],[test x$enable_avx512 = xyes])
AM_CONDITIONAL([ENABLE_SHANI],[test x$enable_shani = xyes])
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])

@@ -1381,6 +1404,7 @@ AC_SUBST(SANITIZER_LDFLAGS)
AC_SUBST(SSE42_CXXFLAGS)
AC_SUBST(SSE41_CXXFLAGS)
AC_SUBST(AVX2_CXXFLAGS)
AC_SUBST(AVX512_CXXFLAGS)
AC_SUBST(SHANI_CXXFLAGS)
AC_SUBST(LIBTOOL_APP_LDFLAGS)
AC_SUBST(USE_UPNP)
@@ -52,6 +52,10 @@ if ENABLE_AVX2
LIBBITCOIN_CRYPTO_AVX2 = crypto/libbitcoin_crypto_avx2.a
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_AVX2)
endif
if ENABLE_AVX512
LIBBITCOIN_CRYPTO_AVX512 = crypto/libbitcoin_crypto_avx512.a
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_AVX512)
endif
if ENABLE_SHANI
LIBBITCOIN_CRYPTO_SHANI = crypto/libbitcoin_crypto_shani.a
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SHANI)
@@ -327,6 +331,12 @@ crypto_libbitcoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
crypto_libbitcoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp

crypto_libbitcoin_crypto_avx512_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
crypto_libbitcoin_crypto_avx512_a_CPPFLAGS = $(AM_CPPFLAGS)
crypto_libbitcoin_crypto_avx512_a_CXXFLAGS += $(AVX512_CXXFLAGS)
crypto_libbitcoin_crypto_avx512_a_CPPFLAGS += -DENABLE_AVX512
crypto_libbitcoin_crypto_avx512_a_SOURCES = crypto/sha256_avx512.cpp

crypto_libbitcoin_crypto_shani_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
crypto_libbitcoin_crypto_shani_a_CPPFLAGS = $(AM_CPPFLAGS)
crypto_libbitcoin_crypto_shani_a_CXXFLAGS += $(SHANI_CXXFLAGS)
@@ -139,6 +139,7 @@ test_test_bitcoin_fuzzy_LDADD = \
$(LIBBITCOIN_CRYPTO) \
$(LIBBITCOIN_CRYPTO_SSE41) \
$(LIBBITCOIN_CRYPTO_AVX2) \
$(LIBBITCOIN_CRYPTO_AVX512) \
$(LIBBITCOIN_CRYPTO_SHANI) \
$(LIBSECP256K1)

@@ -29,6 +29,11 @@ namespace sha256d64_avx2
void Transform_8way(unsigned char* out, const unsigned char* in);
}

namespace sha256d64_avx512
{
void Transform_16way(unsigned char* out, const unsigned char* in);
}

namespace sha256d64_shani
{
void Transform_2way(unsigned char* out, const unsigned char* in);
@@ -461,6 +466,7 @@ TransformD64Type TransformD64 = sha256::TransformD64;
TransformD64Type TransformD64_2way = nullptr;
TransformD64Type TransformD64_4way = nullptr;
TransformD64Type TransformD64_8way = nullptr;
TransformD64Type TransformD64_16way = nullptr;

bool SelfTest() {
// Input state (equal to the initial SHA256 state)
@@ -560,11 +566,11 @@ void inline cpuid(uint32_t leaf, uint32_t subleaf, uint32_t& a, uint32_t& b, uin
}

/** Check whether the OS has enabled AVX registers. */
bool AVXEnabled()
uint32_t AVXEnabledFlags()
{
uint32_t a, d;
__asm__("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
return (a & 6) == 6;
return a;
}
#endif
} // namespace
@@ -578,29 +584,36 @@ std::string SHA256AutoDetect()
bool have_xsave = false;
bool have_avx = false;
bool have_avx2 = false;
bool have_avx512 = false;
bool have_shani = false;
bool enabled_avx = false;
bool enabled_avx512 = false;

(void)AVXEnabled;
(void)AVXEnabledFlags;
(void)have_sse4;
(void)have_avx;
(void)have_xsave;
(void)have_avx2;
(void)have_avx512;
(void)have_shani;
(void)enabled_avx;
(void)enabled_avx512;

uint32_t eax, ebx, ecx, edx;
cpuid(1, 0, eax, ebx, ecx, edx);
have_sse4 = (ecx >> 19) & 1;
have_xsave = (ecx >> 27) & 1;
have_avx = (ecx >> 28) & 1;
if (have_xsave && have_avx) {
enabled_avx = AVXEnabled();
uint32_t flags = AVXEnabledFlags();
enabled_avx = (flags & 6) == 6;
enabled_avx512 = (flags & 0xe6) == 0xe6;
}
if (have_sse4) {
cpuid(7, 0, eax, ebx, ecx, edx);
have_avx2 = (ebx >> 5) & 1;
have_shani = (ebx >> 29) & 1;
have_avx512 = (ebx >> 16) & 1;
}

#if defined(ENABLE_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
@@ -632,6 +645,13 @@ std::string SHA256AutoDetect()
ret += ",avx2(8way)";
}
#endif

#if defined(ENABLE_AVX512) && !defined(BUILD_BITCOIN_INTERNAL)
if (have_avx512 && enabled_avx512) {
TransformD64_16way = sha256d64_avx512::Transform_16way;
ret += ",avx512(16way)";
}
#endif
#endif

assert(SelfTest());
@@ -697,6 +717,14 @@ CSHA256& CSHA256::Reset()

void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
{
if (TransformD64_16way) {
while (blocks >= 16) {
TransformD64_16way(out, in);
out += 512;
in += 1024;
blocks -= 16;
}
}
if (TransformD64_8way) {
while (blocks >= 8) {
TransformD64_8way(out, in);
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.