Skip to content

Commit

Permalink
check availability of SSE* instructions on target platform
Browse files Browse the repository at this point in the history
Checking sse instructions on build host by reading /proc/cpuinfo
is not usable for crosscompilation.
  • Loading branch information
Karry committed May 29, 2022
1 parent f9166e9 commit f4d1c1e
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 87 deletions.
139 changes: 52 additions & 87 deletions CMakeModules/FindSSE.cmake
Original file line number Diff line number Diff line change
@@ -1,104 +1,69 @@
# Check if SSE instructions are available on the machine where
# the project is compiled.
# Check if SSE instructions are available by the compiler and target platform (be aware of cross compilation)
include(CheckCCompilerFlag)

IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
check_c_compiler_flag(-msse2 HAVE_SSE2)
check_c_compiler_flag(-msse3 HAVE_SSE3)
check_c_compiler_flag(-mssse3 HAVE_SSSE3)
check_c_compiler_flag(-msse4.1 HAVE_SSE4_1)

STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
ENDIF (SSE2_TRUE)
# Some compilers understand SSE flags, even when target platform doesn't support it (Clang with arm target)
# It is necessary try to compile actual code
if(HAVE_SSE2)
try_compile(SSE_OK "${PROJECT_BINARY_DIR}"
"${PROJECT_SOURCE_DIR}/CMakeModules/TestSSE2.c"
COMPILE_DEFINITIONS "-msse2" )
if(NOT SSE_OK)
message(STATUS "SSE2 test compilation fails")
set(HAVE_SSE2 FALSE)
endif()
endif()

# /proc/cpuinfo apparently omits sse3 :(
STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE)
IF (NOT SSE3_TRUE)
STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE)
ENDIF (NOT SSE3_TRUE)
if(HAVE_SSE3)
try_compile(SSE_OK "${PROJECT_BINARY_DIR}"
"${PROJECT_SOURCE_DIR}/CMakeModules/TestSSE3.c"
COMPILE_DEFINITIONS "-msse3" )
if(NOT SSE_OK)
message(STATUS "SSE3 test compilation fails")
set(HAVE_SSE3 FALSE)
endif()
endif()

STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE)
IF (SSE3_TRUE OR SSSE3_TRUE)
set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
ELSE (SSE3_TRUE OR SSSE3_TRUE)
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
ENDIF (SSE3_TRUE OR SSSE3_TRUE)
IF (SSSE3_TRUE)
set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
ELSE (SSSE3_TRUE)
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
ENDIF (SSSE3_TRUE)
if(HAVE_SSSE3)
try_compile(SSE_OK "${PROJECT_BINARY_DIR}"
"${PROJECT_SOURCE_DIR}/CMakeModules/TestSSSE3.c"
COMPILE_DEFINITIONS "-mssse3" )
if(NOT SSE_OK)
message(STATUS "SSE3 test compilation fails")
set(HAVE_SSSE3 FALSE)
endif()
endif()

STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
CPUINFO)
if(HAVE_SSE4_1)
try_compile(SSE_OK "${PROJECT_BINARY_DIR}"
"${PROJECT_SOURCE_DIR}/CMakeModules/TestSSE41.c"
COMPILE_DEFINITIONS "-msse4.1" )
if(NOT SSE_OK)
message(STATUS "SSE4.1 test compilation fails")
set(HAVE_SSE4_1 FALSE)
endif()
endif()

STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
ENDIF (SSE2_TRUE)

STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE)
IF (SSE3_TRUE)
set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
ELSE (SSE3_TRUE)
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
ENDIF (SSE3_TRUE)

STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE)
IF (SSSE3_TRUE)
set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
ELSE (SSSE3_TRUE)
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
ENDIF (SSSE3_TRUE)

STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE "${CPUINFO}")
STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
# TODO
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE2_FOUND ${HAVE_SSE2} CACHE BOOL "SSE2 available on target")
set(SSE3_FOUND ${HAVE_SSE3} CACHE BOOL "SSE3 available on target")
set(SSSE3_FOUND ${HAVE_SSSE3} CACHE BOOL "SSSE3 available on target")
set(SSE4_1_FOUND ${HAVE_SSE4_1} CACHE BOOL "SSE4.1 available on target")

if(NOT SSE2_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.")
MESSAGE(STATUS "SSE2 is not supported on target platform.")
endif(NOT SSE2_FOUND)
if(NOT SSE3_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.")
MESSAGE(STATUS "SSE3 is not supported on target platform.")
endif(NOT SSE3_FOUND)
if(NOT SSSE3_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.")
MESSAGE(STATUS "SSSE3 is not supported on target platform.")
endif(NOT SSSE3_FOUND)
if(NOT SSE4_1_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.")
MESSAGE(STATUS "SSE4.1 is not supported on target platform.")
endif(NOT SSE4_1_FOUND)

mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND)
20 changes: 20 additions & 0 deletions CMakeModules/TestSSE2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include <stdio.h>

//include sse and sse2 headers
#include <xmmintrin.h>
#include <emmintrin.h>

/* __m128 is ugly to write */
typedef __m128d v2df; // vector of 2 double (sse2)

int main(int argc, char **argv)
{
v2df calcx = _mm_setr_pd(2.0, 3.0);
v2df xx = _mm_mul_pd(calcx, calcx);
double d;
_mm_storel_pd(&d, xx);
printf("%f\n", d);
_mm_storeh_pd(&d, xx);
printf("%f\n", d);
return 0;
}
20 changes: 20 additions & 0 deletions CMakeModules/TestSSE3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include <stdio.h>

//include sse and sse2 headers
#include <xmmintrin.h>
#include <emmintrin.h>

/* __m128 is ugly to write */
typedef __m128d v2df; // vector of 2 double (sse2)

int main(int argc, char **argv)
{
v2df calcx = _mm_setr_pd(2.0, 3.0);
v2df xx = _mm_mul_pd(calcx, calcx);
double d;
_mm_storel_pd(&d, xx);
printf("%f\n", d);
_mm_storeh_pd(&d, xx);
printf("%f\n", d);
return 0;
}
15 changes: 15 additions & 0 deletions CMakeModules/TestSSE41.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <stdio.h>

//include sse, sse2 and sse4.1 headers
#include <xmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>

int main(int argc, char **argv)
{
__m128i a = _mm_set_epi32(1, 2, 3, 4);
__m128i b = _mm_set_epi32(1, 2, 3, 4);
int i = _mm_testz_si128(a, b);
printf("%d\n", i);
return 0;
}
16 changes: 16 additions & 0 deletions CMakeModules/TestSSSE3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include <stdio.h>

//include sse, sse2 and ssse3 headers
#include <xmmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>

int main(int argc, char **argv)
{
__m128i calcx = _mm_set_epi32(1, 2, 3, 4);
__m128i xx = _mm_hadd_epi32(calcx, calcx);
int i;
_mm_storeu_si32(&i, xx);
printf("%d\n", i);
return 0;
}

0 comments on commit f4d1c1e

Please sign in to comment.