Skip to content

Commit

Permalink
Fix #1381, improve cmake simd detection
Browse files Browse the repository at this point in the history
Add cmake option `AX_ISA_LEVEL`

Note: we set default AX_ISA_LEVEL to 2 for sse4.1 for axmol app can runs on large amount devices
If you want axmol app runs on more old devices, you can specify in cmake cmdline `-DAX_ISA_LEVEL=1` or
`-DAX_ISA_LEVEL=0` to disable SIMD acceleration for thirdparty astcenc and webp
otherwise, host compiler generated instructions will crash on old devices which not support high level
SIMD instructions.
  • Loading branch information
halx99 committed Oct 8, 2023
1 parent 9252f1e commit fcd3312
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 148 deletions.
1 change: 1 addition & 0 deletions CMakeOptions.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
- win32: whether use ANGLE GLES backend
- osx: whether use OpenGL instead Metal backend
- ios/tvos: whether use GLES instead Metal backend
- AX_ISA_LEVEL: specifiy SIMD Instructions Acceleration Level: 0~4, 0: disabled, 1: SSE2, 2: SSE4.1/NEON, 3: SSE4.2, 4: AVX2, default: 2
- AX_GLES_PROFILE: speicify GLES profile version for GLES backend, valid value `200`, `300`
- AX_WASM_THREADS: specify wasm thread count, valid value: number: `>=0` , string: must be: `auto` or `navigator.hardwareConcurrency`(default),
- number: explicit set thread count, `0` means disable wasm thread support
Expand Down
182 changes: 98 additions & 84 deletions thirdparty/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,99 +39,113 @@ include(CheckCCompilerFlag)
include(CheckCSourceCompiles)
include(CheckCXXSourceCompiles)

# Detetion SIMD instrinsics

### check -msse2 flag
set(OLD_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
if(MSVC)
set(CMAKE_REQUIRED_FLAGS "${OLD_REQUIRED_FLAGS} /WX")
check_c_compiler_flag("/arch:SSE2" AX_HAVE_SSE2_SWITCH)
else()
set(CMAKE_REQUIRED_FLAGS "${OLD_REQUIRED_FLAGS} -Werror")
check_c_compiler_flag(-msse2 AX_HAVE_SSE2_SWITCH)
endif()
if (AX_HAVE_SSE2_SWITCH)
set(AX_HAVE_SSE2_INTRINSICS 1)
endif()
### end check -msse2 flag

if (NOT TVOS)
# Checking intel SIMD Intrinsics
if(APPLE)
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mpopcnt")
# Note: we set default AX_ISA_LEVEL to 2 for sse4.1 for axmol app can runs on large amount devices
# If you want axmol app runs on more old devices, you can specify in cmake cmdline `-DAX_ISA_LEVEL=1`,
# otherwise, host compiler generated instructions will crash on old devices which not support high level
# SIMD instructions.
set(AX_ISA_LEVEL 2 CACHE STRING "SIMD Instructions Acceleration Level")

# SIMD instrinsics detetion when AX_ISA_LEVEL not 0
if(AX_ISA_LEVEL)
### check -msse2 flag
set(OLD_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
if(MSVC)
set(CMAKE_REQUIRED_FLAGS "${OLD_REQUIRED_FLAGS} /WX")
check_c_compiler_flag("/arch:SSE2" AX_HAVE_SSE2_SWITCH)
else()
set(CMAKE_REQUIRED_FLAGS "${OLD_REQUIRED_FLAGS} -Werror")
check_c_compiler_flag(-msse2 AX_HAVE_SSE2_SWITCH)
endif()
check_c_source_compiles("#include <immintrin.h>
#include <stdint.h>
int main()
{
__m256 m = _mm256_set_ps(1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f);
return (int)*(float*)&m;
}" AX_HAVE_AVX2_INTRINSICS)
check_c_source_compiles("#include <nmmintrin.h>
#include <stdint.h>
int main()
{
uint32_t v = 0;
return (int)_mm_popcnt_u32(v);
}" AX_HAVE_SSE42_INTRINSICS)
check_c_source_compiles("#include <smmintrin.h>
#include <stdint.h>
int main()
{
__m128i shuf = _mm_set_epi8(0,0,0,0, 0,0,0,0, 0,0,0,0, 12,8,4,0);
return *(int*)&shuf;
}" AX_HAVE_SSE41_INTRINSICS)

if (NOT AX_HAVE_SSE2_INTRINSICS)
check_c_source_compiles("#include <emmintrin.h>
#include <stdint.h>
if (AX_HAVE_SSE2_SWITCH)
set(AX_HAVE_SSE2_INTRINSICS 1)
endif()
### end check -msse2 flag

if (NOT TVOS)
macro(ax_check_c_source source outputVar)
if (NOT CMAKE_CROSSCOMPILING)
check_c_source_runs("${source}" ${outputVar})
else()
check_c_source_compiles("${source}" ${outputVar})
endif()
endmacro(ax_check_c_source source var)

# Checking intel SIMD Intrinsics
include(CheckCSourceRuns)
if(APPLE)
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mpopcnt")
endif()
ax_check_c_source("#include <immintrin.h>
int main()
{
__m256 m = _mm256_set_ps(0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f);
return (int)*(float*)&m;
}" AX_HAVE_AVX2_INTRINSICS)
ax_check_c_source("#include <nmmintrin.h>
int main()
{
unsigned int v = 0;
return (int)_mm_popcnt_u32(v);
}" AX_HAVE_SSE42_INTRINSICS)
ax_check_c_source("#include <smmintrin.h>
int main()
{
__m128d m = _mm_set_sd(0.0);
return (int)*(double*)&m;
}" AX_HAVE_SSE2_INTRINSICS)
__m128i shuf = _mm_set_epi8(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0);
return *(int*)&shuf;
}" AX_HAVE_SSE41_INTRINSICS)

if (NOT AX_HAVE_SSE2_INTRINSICS)
ax_check_c_source("#include <emmintrin.h>
int main()
{
__m128d m = _mm_set_sd(0.0);
return (int)*(double*)&m;
}" AX_HAVE_SSE2_INTRINSICS)
endif()

set(CMAKE_REQUIRED_FLAGS ${OLD_REQUIRED_FLAGS})

### Checking ARM SIMD neon
if (NOT WASM) # wasm neon stupid not work, so skipped
check_include_file(arm_neon.h AX_HAVE_ARM_NEON_H)
if(AX_HAVE_ARM_NEON_H)
check_c_source_compiles("#include <arm_neon.h>
int main()
{
int32x4_t ret4 = vdupq_n_s32(0);
return vgetq_lane_s32(ret4, 0);
}" AX_HAVE_NEON_INTRINSICS)
endif()
endif()
else()
message(AUTHOR_WARNING "Skipping AVX2/SSE4/NEON detection for astc-encoder when build target 'tvos'")
endif()

set(CMAKE_REQUIRED_FLAGS ${OLD_REQUIRED_FLAGS})

### Checking ARM SIMD neon
check_include_file(arm_neon.h AX_HAVE_ARM_NEON_H)
if(AX_HAVE_ARM_NEON_H)
check_c_source_compiles("#include <arm_neon.h>
int main()
{
int32x4_t ret4 = vdupq_n_s32(0);
return vgetq_lane_s32(ret4, 0);
}" AX_HAVE_NEON_INTRINSICS)
unset(OLD_REQUIRED_FLAGS)

### set AX_ISA_SIMD
if(AX_HAVE_AVX2_INTRINSICS AND AX_ISA_LEVEL GREATER_EQUAL 4)
set(AX_ISA_SIMD "avx2" CACHE STRING "" FORCE)
elseif(AX_HAVE_SSE42_INTRINSICS AND AX_ISA_LEVEL GREATER_EQUAL 3)
set(AX_ISA_SIMD "sse4.2" CACHE STRING "" FORCE)
elseif(AX_HAVE_SSE41_INTRINSICS AND AX_ISA_LEVEL GREATER_EQUAL 2)
set(AX_ISA_SIMD "sse4.1" CACHE STRING "" FORCE)
elseif(AX_HAVE_SSE2_INTRINSICS AND AX_ISA_LEVEL)
set(AX_ISA_SIMD "sse2" CACHE STRING "" FORCE)
elseif(AX_HAVE_NEON_INTRINSICS AND AX_ISA_LEVEL)
set(AX_ISA_SIMD "neon" CACHE STRING "" FORCE)
else()
set(AX_ISA_SIMD "null")
endif()
else()
message(AUTHOR_WARNING "Skipping AVX2/SSE4/NEON detection for astc-encoder when build target 'tvos'")
endif()

set(CMAKE_REQUIRED_FLAGS ${OLD_REQUIRED_FLAGS})
unset(OLD_REQUIRED_FLAGS)

### set AX_ISA_SIMD
if(AX_HAVE_AVX2_INTRINSICS)
set(AX_ISA_SIMD "avx2")
elseif(AX_HAVE_SSE42_INTRINSICS)
set(AX_ISA_SIMD "sse4.2")
elseif(AX_HAVE_SSE41_INTRINSICS)
set(AX_ISA_SIMD "sse4.1")
elseif(AX_HAVE_SSE2_INTRINSICS)
set(AX_ISA_SIMD "sse2")
elseif(AX_HAVE_NEON_INTRINSICS)
set(AX_ISA_SIMD "neon")
else()
set(AX_ISA_SIMD "none")
endif()

message(AUTHOR_WARNING "AX_ISA_SIMD=${AX_ISA_SIMD},AX_HAVE_AVX2_INTRINSICS=${AX_HAVE_AVX2_INTRINSICS},AX_HAVE_SSE42_INTRINSICS=${AX_HAVE_SSE42_INTRINSICS},AX_HAVE_SSE41_INTRINSICS=${AX_HAVE_SSE41_INTRINSICS},AX_HAVE_SSE2_INTRINSICS=${AX_HAVE_SSE2_INTRINSICS},AX_HAVE_NEON_INTRINSICS=${AX_HAVE_NEON_INTRINSICS}")
message(AUTHOR_WARNING "AX_ISA_SIMD=${AX_ISA_SIMD}")

if (WINDOWS)
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if (AX_HAVE_SSE41_INTRINSICS)
add_compile_options("-msse4.1")
if (WINDOWS)
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if (AX_HAVE_SSE41_INTRINSICS AND AX_ISA_LEVEL GREATER_EQUAL 2)
add_compile_options("-msse4.1")
endif()
endif()
endif()
endif()
Expand Down
76 changes: 21 additions & 55 deletions thirdparty/astcenc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,71 +42,29 @@ add_library(${target_name} STATIC

# target_compile_definitions(${target_name}
# PUBLIC ASTCENC_DECOMPRESS_ONLY)
if (NOT DEFINED ASTC_ISA_SIMD)
if (NOT (ARCH_ALIAS STREQUAL "x86") AND NOT TVOS)
set(ASTC_HAVE_AVX2_INTRINSICS ${AX_HAVE_AVX2_INTRINSICS})
set(ASTC_HAVE_SSE42_INTRINSICS ${AX_HAVE_SSE42_INTRINSICS})
set(ASTC_HAVE_SSE41_INTRINSICS ${AX_HAVE_SSE41_INTRINSICS})
if (AX_HAVE_NEON_INTRINSICS)
set(OLD_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11")
check_cxx_source_compiles("#include <arm_neon.h>
int main()
{
int32x4_t ret4 = vdupq_n_s32(0);
uint32x4_t v{};
float16x4_t f16 = vcvt_f16_f32(v);
return vgetq_lane_s32(ret4, 0);
}" ASTC_HAVE_NEON_INTRINSICS)
set(CMAKE_REQUIRED_FLAGS ${OLD_REQUIRED_FLAGS})
unset(OLD_REQUIRED_FLAGS)
endif()
else()
set(ASTC_HAVE_SSE2_INTRINSICS ${AX_HAVE_SSE2_INTRINSICS})
message(AUTHOR_WARNING "Skipping AVX2/SSE4/NEON detection for astc-encoder when build target 'x86' and 'tvos'")
endif()

### set ASTC_ISA_SIMD
if(ASTC_HAVE_AVX2_INTRINSICS)
set(ASTC_ISA_SIMD "avx2")
elseif(ASTC_HAVE_SSE42_INTRINSICS)
set(ASTC_ISA_SIMD "sse4.2")
elseif(ASTC_HAVE_SSE41_INTRINSICS)
set(ASTC_ISA_SIMD "sse4.1")
elseif(ASTC_HAVE_SSE2_INTRINSICS)
if (NOT (ARCH_ALIAS STREQUAL "x86") AND NOT TVOS AND NOT (ARCH_ALIAS MATCHES "arm.*v7"))
set(ASTC_ISA_SIMD ${AX_ISA_SIMD})
else() # astcenc not support sse4/avx in x64, not support neon in arm64
message(AUTHOR_WARNING "Skipping AVX2/SSE4/NEON detection for astc-encoder when build tvos/x86/armv7")
if(AX_HAVE_SSE2_INTRINSICS)
set(ASTC_ISA_SIMD "sse2")
elseif(ASTC_HAVE_NEON_INTRINSICS)
set(ASTC_ISA_SIMD "neon")
else()
set(ASTC_ISA_SIMD "none")
set(ASTC_ISA_SIMD "null")
endif()

# disable simd when wasm
if(WASM)
set(ASTC_ISA_SIMD "none")
endif()

message(AUTHOR_WARNING "ASTC_ISA_SIMD=${ASTC_ISA_SIMD}")
endif()

message(AUTHOR_WARNING "ASTC_ISA_SIMD=${ASTC_ISA_SIMD}")

# Set up configuration for SIMD ISA builds
if(${ASTC_ISA_SIMD} MATCHES "none")
target_compile_definitions(${target_name}
PUBLIC
ASTCENC_NEON=0
ASTCENC_SSE=0
ASTCENC_AVX=0
ASTCENC_POPCNT=0
ASTCENC_F16C=0)
elseif(${ASTC_ISA_SIMD} MATCHES "neon")
if(ASTC_ISA_SIMD STREQUAL "neon")
target_compile_definitions(${target_name}
PUBLIC
ASTCENC_NEON=1
ASTCENC_SSE=0
ASTCENC_AVX=0
ASTCENC_POPCNT=0
ASTCENC_F16C=0)
elseif(${ASTC_ISA_SIMD} MATCHES "avx2")
elseif(ASTC_ISA_SIMD STREQUAL "avx2")
target_compile_definitions(${target_name}
PUBLIC
ASTCENC_NEON=0
Expand All @@ -119,7 +77,7 @@ elseif(${ASTC_ISA_SIMD} MATCHES "avx2")
PRIVATE
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-mavx2 -mpopcnt -mf16c>
$<$<CXX_COMPILER_ID:MSVC>:/arch:AVX2>)
elseif(${ASTC_ISA_SIMD} MATCHES "sse4.2")
elseif(ASTC_ISA_SIMD STREQUAL "sse4.2")
target_compile_definitions(${target_name}
PUBLIC
ASTCENC_NEON=0
Expand All @@ -131,7 +89,7 @@ elseif(${ASTC_ISA_SIMD} MATCHES "sse4.2")
target_compile_options(${target_name}
PRIVATE
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-msse4.2 -mpopcnt>)
elseif(${ASTC_ISA_SIMD} MATCHES "sse4.1")
elseif(ASTC_ISA_SIMD STREQUAL "sse4.1")
target_compile_definitions(${target_name}
PUBLIC
ASTCENC_NEON=0
Expand All @@ -143,14 +101,22 @@ elseif(${ASTC_ISA_SIMD} MATCHES "sse4.1")
target_compile_options(${target_name}
PRIVATE
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-msse4.1 -mpopcnt>)
elseif(${ASTC_ISA_SIMD} MATCHES "sse2")
elseif(ASTC_ISA_SIMD STREQUAL "sse2")
target_compile_definitions(${target_name}
PUBLIC
ASTCENC_NEON=0
ASTCENC_SSE=20
ASTCENC_AVX=0
ASTCENC_POPCNT=0
ASTCENC_F16C=0)
else() # null
target_compile_definitions(${target_name}
PUBLIC
ASTCENC_NEON=0
ASTCENC_SSE=0
ASTCENC_AVX=0
ASTCENC_POPCNT=0
ASTCENC_F16C=0)
endif()

target_include_directories(${target_name} PUBLIC ..)
12 changes: 3 additions & 9 deletions thirdparty/webp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,11 @@ target_include_directories(${target_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}")
target_include_directories(${target_name} PUBLIC "${CMAKE_CURRENT_LIST_DIR}/src/webp")
target_compile_definitions(${target_name} PRIVATE HAVE_CONFIG_H=1)

if (NOT WASM)
set(_WEBP_ISA_SIMD ${AX_ISA_SIMD})
else()
set(_WEBP_ISA_SIMD "none")
endif()

if (_WEBP_ISA_SIMD MATCHES "neon")
if (AX_ISA_SIMD MATCHES "neon")
target_compile_definitions(${target_name} PRIVATE WEBP_HAVE_NEON=1)
elseif(_WEBP_ISA_SIMD MATCHES "avx2" OR _WEBP_ISA_SIMD MATCHES "sse4.2" OR _WEBP_ISA_SIMD MATCHES "sse4.1")
elseif(AX_ISA_SIMD MATCHES "avx2" OR AX_ISA_SIMD MATCHES "sse4")
target_compile_definitions(${target_name} PRIVATE WEBP_HAVE_SSE2=1)
target_compile_definitions(${target_name} PRIVATE WEBP_HAVE_SSE41=1)
elseif(_WEBP_ISA_SIMD MATCHES "sse2")
elseif(AX_ISA_SIMD MATCHES "sse2")
target_compile_definitions(${target_name} PRIVATE WEBP_HAVE_SSE2=1)
endif()

0 comments on commit fcd3312

Please sign in to comment.