diff --git a/CMakeLists.txt b/CMakeLists.txt index 57e986c96..22af3c5cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required (VERSION 2.8.11) +cmake_minimum_required (VERSION 2.8.12) + project (vectorscan C CXX) set (HS_MAJOR_VERSION 5) @@ -165,27 +166,49 @@ if (CMAKE_COMPILER_IS_GNUCC AND NOT CROSS_COMPILE) # cpuid info and then chooses the best microarch it can (and replaces # the flag), so use that for tune. + set(TUNE_FLAG "mtune") + set(GNUCC_TUNE "") + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") + # arg1 might exist if using ccache string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) - set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -mtune=native) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -${ARCH_FLAG}=native -${TUNE_FLAG}=native) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_VARIABLE _GCC_OUTPUT) - string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}" POS) + set(_GCC_OUTPUT_TUNE ${_GCC_OUTPUT}) + string(FIND "${_GCC_OUTPUT}" "${ARCH_FLAG}=" POS) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT) string(REGEX REPLACE "${ARCH_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_ARCH "${_GCC_OUTPUT}") + string(FIND "${_GCC_OUTPUT_TUNE}" "${TUNE_FLAG}=" POS_TUNE) + string(SUBSTRING "${_GCC_OUTPUT_TUNE}" ${POS_TUNE} -1 _GCC_OUTPUT_TUNE) + string(REGEX REPLACE "${TUNE_FLAG}=[ \t]*([^ \n]*)[ \n].*" "\\1" GNUCC_TUNE "${_GCC_OUTPUT_TUNE}") + + string(FIND "${GNUCC_ARCH}" "sve" POS_SVE) + string(FIND "${GNUCC_ARCH}" "sve2" POS_SVE2) + string(FIND "${GNUCC_ARCH}" "sve2-bitperm" POS_SVE2_BITPERM) + if (NOT POS_SVE EQUAL 0) + set(SVE_FOUND 1) + elseif(NOT POS_SVE2 EQUAL 0) + set(SVE2_FOUND 1) + elseif(NOT POS_SVE2_BITPERM EQUAL 0) + set(SVE2_BITPERM_FOUND 1) + endif() + + message(STATUS "ARCH_FLAG '${ARCH_FLAG}' '${GNUCC_ARCH}', TUNE_FLAG '${TUNE_FLAG}' '${GNUCC_TUNE}' ") + # test the parsed flag - set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) + set (EXEC_ARGS ${CC_ARG1} -E - -${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}) execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} OUTPUT_QUIET ERROR_QUIET INPUT_FILE /dev/null RESULT_VARIABLE GNUCC_TUNE_TEST) if (NOT GNUCC_TUNE_TEST EQUAL 0) - message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid, falling back to -mtune=native") - set(TUNE_FLAG native) + message(WARNING "Something went wrong determining gcc tune: -mtune=${GNUCC_TUNE} not valid, falling back to -mtune=native") + set(GNUCC_TUNE native) else() - set(TUNE_FLAG ${GNUCC_ARCH}) - message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${TUNE_FLAG}") + set(GNUCC_TUNE ${GNUCC_TUNE}) + message(STATUS "gcc will tune for ${GNUCC_ARCH}, ${GNUCC_TUNE}") endif() elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE) if (ARCH_IA32 OR ARCH_X86_64) @@ -226,22 +249,26 @@ if (ARCH_IA32 OR ARCH_X86_64) endif() if (ARCH_AARCH64) - if (BUILD_SVE2_BITPERM) + if (BUILD_SVE2_BITPERM AND NOT SVE2_BITPERM_FOUND) set(GNUCC_ARCH "${GNUCC_ARCH}+sve2-bitperm") - elseif (BUILD_SVE2) + elseif (BUILD_SVE2 AND NOT SVE2_FOUND) set(GNUCC_ARCH "${GNUCC_ARCH}+sve2") - elseif (BUILD_SVE) + elseif (BUILD_SVE AND NOT SVE_FOUND) set(GNUCC_ARCH "${GNUCC_ARCH}+sve") endif () endif(ARCH_AARCH64) - message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}") message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}") if (NOT FAT_RUNTIME) - set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") - set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + if (GNUCC_TUNE) + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}") + else() + set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}") + set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}") + endif() endif() #if (ARCH_IA32 OR ARCH_X86_64 OR ARCH_ARM32 OR ARCH_AARCH64) @@ -296,6 +323,12 @@ if (NOT RELEASE_BUILD) # release builds set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") + if (CMAKE_COMPILER_IS_CLANG) + if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "13.0") + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-unused-but-set-variable") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable") + endif() + endif() endif() if (DISABLE_ASSERTS) @@ -332,6 +365,7 @@ if (ARCH_IA32 OR ARCH_X86_64) elseif (ARCH_ARM32 OR ARCH_AARCH64) CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_C_ARM_NEON_H) if (BUILD_SVE OR BUILD_SVE2 OR BUILD_SVE2_BITPERM) + set(CMAKE_REQUIRED_FLAGS ${ARCH_CXX_FLAGS}) CHECK_INCLUDE_FILE_CXX(arm_sve.h HAVE_C_ARM_SVE_H) if (NOT HAVE_C_ARM_SVE_H) message(FATAL_ERROR "arm_sve.h is required to build for SVE.") diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 055920b29..d1afcbcc6 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1484,12 +1484,12 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, find_wide_state(info); } - u16 total_daddy = 0; bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); // Sherman optimization if (info.impl_alpha_size > 16) { + u16 total_daddy = 0; for (u32 i = 0; i < info.size(); i++) { if (info.is_widestate(i)) { continue; diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index d0f1f029a..2b898cf76 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -385,8 +385,7 @@ bool improveGraph(NGHolder &g, som_type som) { const vector ordering = getTopoOrdering(g); - return enlargeCyclicCR(g, som, ordering) - | enlargeCyclicCR_rev(g, ordering); + return enlargeCyclicCR(g, som, ordering) || enlargeCyclicCR_rev(g, ordering); } /** finds a smaller reachability for a state by the reverse transformation of diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index dc9ee3088..82f0e2e02 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -216,9 +216,9 @@ RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd, const bool fixed_offset_src = g[u].fixedOffset(); const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF); - DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", + /*DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", g[u].index, g[v].index, g[e].minBound, g[e].maxBound, - (int)g[u].fixedOffset(), (int)g[v].left); + (int)g[u].fixedOffset(), (int)g[v].left);*/ if (g[v].left) { // Roles with prefix engines have their history handled by that prefix. diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h index ea1766b26..119d0946f 100644 --- a/src/util/arch/ppc64el/simd_utils.h +++ b/src/util/arch/ppc64el/simd_utils.h @@ -152,7 +152,7 @@ static really_inline u32 movemask128(m128 a) { static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; uint8x16_t bitmask = vec_gb((uint8x16_t) a); bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm); - u32 movemask; + u32 ALIGN_ATTR(16) movemask; vec_ste((uint32x4_t) bitmask, 0, &movemask); return movemask; } @@ -285,27 +285,27 @@ m128 loadbytes128(const void *ptr, unsigned int n) { return a; } -#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(b), (int8x16_t)(a), (16 - offset)); break; +#define CASE_ALIGN_VECTORS(a, b, offset) case offset: return (m128)vec_sld((int8x16_t)(a), (int8x16_t)(b), (16 - offset)); break; static really_really_inline m128 palignr_imm(m128 r, m128 l, int offset) { switch (offset) { case 0: return l; break; - CASE_ALIGN_VECTORS(l, r, 1); - CASE_ALIGN_VECTORS(l, r, 2); - CASE_ALIGN_VECTORS(l, r, 3); - CASE_ALIGN_VECTORS(l, r, 4); - CASE_ALIGN_VECTORS(l, r, 5); - CASE_ALIGN_VECTORS(l, r, 6); - CASE_ALIGN_VECTORS(l, r, 7); - CASE_ALIGN_VECTORS(l, r, 8); - CASE_ALIGN_VECTORS(l, r, 9); - CASE_ALIGN_VECTORS(l, r, 10); - CASE_ALIGN_VECTORS(l, r, 11); - CASE_ALIGN_VECTORS(l, r, 12); - CASE_ALIGN_VECTORS(l, r, 13); - CASE_ALIGN_VECTORS(l, r, 14); - CASE_ALIGN_VECTORS(l, r, 15); + CASE_ALIGN_VECTORS(r, l, 1); + CASE_ALIGN_VECTORS(r, l, 2); + CASE_ALIGN_VECTORS(r, l, 3); + CASE_ALIGN_VECTORS(r, l, 4); + CASE_ALIGN_VECTORS(r, l, 5); + CASE_ALIGN_VECTORS(r, l, 6); + CASE_ALIGN_VECTORS(r, l, 7); + CASE_ALIGN_VECTORS(r, l, 8); + CASE_ALIGN_VECTORS(r, l, 9); + CASE_ALIGN_VECTORS(r, l, 10); + CASE_ALIGN_VECTORS(r, l, 11); + CASE_ALIGN_VECTORS(r, l, 12); + CASE_ALIGN_VECTORS(r, l, 13); + CASE_ALIGN_VECTORS(r, l, 14); + CASE_ALIGN_VECTORS(r, l, 15); case 16: return r; break; default: return zeroes128(); break; } diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index 2eba69b2d..494bcbd69 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -49,7 +49,7 @@ really_inline SuperVector<16>::SuperVector(SuperVector const &other) template<> template<> -really_inline SuperVector<16>::SuperVector(char __bool __vector v) +really_inline SuperVector<16>::SuperVector(__vector __bool char v) { u.u8x16[0] = (uint8x16_t) v; }; @@ -269,10 +269,10 @@ really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const &b) cons template <> really_inline typename SuperVector<16>::comparemask_type SuperVector<16>::comparemask(void) const { - uint8x16_t bitmask = vec_gb( u.u8x16[0]); static uint8x16_t perm = { 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + uint8x16_t bitmask = vec_gb(u.u8x16[0]); bitmask = (uint8x16_t) vec_perm(vec_splat_u8(0), bitmask, perm); - u32 movemask; + u32 ALIGN_ATTR(16) movemask; vec_ste((uint32x4_t) bitmask, 0, &movemask); return movemask; } diff --git a/src/util/supervector/arch/x86/impl.cpp b/src/util/supervector/arch/x86/impl.cpp index c9daf0cf1..49fbee99d 100644 --- a/src/util/supervector/arch/x86/impl.cpp +++ b/src/util/supervector/arch/x86/impl.cpp @@ -523,9 +523,7 @@ template <> really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len) { SuperVector mask = Ones_vshr(16 -len); - mask.print8("mask"); SuperVector v = _mm_loadu_si128((const m128 *)ptr); - v.print8("v"); return mask & v; }