Skip to content

Commit

Permalink
cpu_features: Switch to cpu_features
Browse files Browse the repository at this point in the history
We used our own CPU feature detection methods. These were not totally
reliable and difficult to implement across compilers and OSes. We switch
to `cpu_features` as a subproject and build upon this expertise.

So far this is a work-in-progress. Known issues:
   - code needs clean-up
   - remove unnecessary checks
   - make sure all NEON versions are still detected correctly
   - incomplete `VOLK_CPU` feature checks:
	- `has_64`
	- `has_3dnow`
	- `has_abm`
	- `has_fma` now includes `fma3` and `fma4`.
   - Remove has xgetbv checks. We don't handle this anymore.
  • Loading branch information
jdemel committed Oct 18, 2020
1 parent e3349ee commit 5e2193c
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 239 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "cpu_features"]
path = cpu_features
url = https://github.com/google/cpu_features.git
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ endif(MSVC)
# Dependencies setup
########################################################################

# cpu_features
add_subdirectory(cpu_features)

# Python
include(VolkPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B
VOLK_PYTHON_CHECK_MODULE("python >= 3.4" sys "sys.version.split()[0] >= '3.4'" PYTHON_MIN_VER_FOUND)
Expand Down
1 change: 1 addition & 0 deletions cpu_features
Submodule cpu_features added at 339bfd
95 changes: 34 additions & 61 deletions gen/archs.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
<flag compiler="gnu">-funsafe-math-optimizations</flag>
<flag compiler="clang">-funsafe-math-optimizations</flag>
<alignment>16</alignment>
<check name="has_neon"></check>
<check name="neon"></check>
</arch>

<arch name="neonv7">
Expand All @@ -27,14 +27,14 @@
<flag compiler="clang">-mfpu=neon</flag>
<flag compiler="clang">-funsafe-math-optimizations</flag>
<alignment>16</alignment>
<check name="has_neonv7"></check>
<check name="neon"></check>
</arch>

<arch name="neonv8">
<flag compiler="gnu">-funsafe-math-optimizations</flag>
<flag compiler="clang">-funsafe-math-optimizations</flag>
<alignment>16</alignment>
<check name="has_neonv8"></check>
<check name="neon"></check>
</arch>

<arch name="32">
Expand All @@ -43,42 +43,42 @@
</arch>

<arch name="64">
<check name="check_extended_cpuid">
<param>0x80000001</param>
</check>
<check name="cpuid_x86_bit"> <!-- checks to see if a bit is set -->
<param>3</param> <!-- eax, ebx, ecx, [edx] -->
<param>0x80000001</param> <!-- cpuid operation -->
<param>29</param> <!-- bit shift -->
</check>
<!-- <check name="check_extended_cpuid"> -->
<!-- <param>0x80000001</param>
</check> -->
<!--<check name="cpuid_x86_bit"> checks to see if a bit is set -->
<!-- <param>3</param> eax, ebx, ecx, [edx] -->
<!-- <param>0x80000001</param> cpuid operation -->
<!-- <param>29</param> bit shift -->
<!-- </check> -->
<flag compiler="gnu">-m64</flag>
<flag compiler="clang">-m64</flag>
</arch>

<arch name="3dnow">
<check name="cpuid_x86_bit">
<!-- <check name="cpuid_x86_bit">
<param>3</param>
<param>0x80000001</param>
<param>31</param>
</check>
</check> -->
<flag compiler="gnu">-m3dnow</flag>
<flag compiler="clang">-m3dnow</flag>
<alignment>8</alignment>
</arch>

<arch name="abm">
<check name="cpuid_x86_bit">
<!-- <check name="cpuid_x86_bit">
<param>3</param>
<param>0x80000001</param>
<param>5</param>
</check>
</check> -->
<flag compiler="gnu">-msse4.2</flag>
<flag compiler="clang">-msse4.2</flag>
<alignment>16</alignment>
</arch>

<arch name="popcount">
<check name="cpuid_x86_bit">
<check name="popcnt">
<param>2</param>
<param>0x00000001</param>
<param>23</param>
Expand All @@ -89,7 +89,7 @@
</arch>

<arch name="mmx">
<check name="cpuid_x86_bit">
<check name="mmx">
<param>3</param>
<param>0x00000001</param>
<param>23</param>
Expand All @@ -101,7 +101,12 @@
</arch>

<arch name="fma">
<check name="cpuid_x86_bit">
<check name="fma3">
<param>2</param>
<param>0x00000001</param>
<param>12</param>
</check>
<check name="fma4">
<param>2</param>
<param>0x00000001</param>
<param>12</param>
Expand All @@ -113,7 +118,7 @@
</arch>

<arch name="sse">
<check name="cpuid_x86_bit">
<check name="sse">
<param>3</param>
<param>0x00000001</param>
<param>25</param>
Expand All @@ -127,7 +132,7 @@
</arch>

<arch name="sse2">
<check name="cpuid_x86_bit">
<check name="sse2">
<param>3</param>
<param>0x00000001</param>
<param>26</param>
Expand All @@ -146,7 +151,7 @@
</arch>

<arch name="sse3">
<check name="cpuid_x86_bit">
<check name="sse3">
<param>2</param>
<param>0x00000001</param>
<param>0</param>
Expand All @@ -160,7 +165,7 @@
</arch>

<arch name="ssse3">
<check name="cpuid_x86_bit">
<check name="ssse3">
<param>2</param>
<param>0x00000001</param>
<param>9</param>
Expand All @@ -172,7 +177,7 @@
</arch>

<arch name="sse4_a">
<check name="cpuid_x86_bit">
<check name="sse4a">
<param>2</param>
<param>0x80000001</param>
<param>6</param>
Expand All @@ -183,7 +188,7 @@
</arch>

<arch name="sse4_1">
<check name="cpuid_x86_bit">
<check name="sse4_1">
<param>2</param>
<param>0x00000001</param>
<param>19</param>
Expand All @@ -195,7 +200,7 @@
</arch>

<arch name="sse4_2">
<check name="cpuid_x86_bit">
<check name="sse4_2">
<param>2</param>
<param>0x00000001</param>
<param>20</param>
Expand All @@ -207,40 +212,24 @@
</arch>

<arch name="avx">
<check name="cpuid_x86_bit">
<check name="avx">
<param>2</param>
<param>0x00000001</param>
<param>28</param>
</check>
<!-- check to make sure that xgetbv is enabled in OS -->
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>27</param>
</check>
<!-- check to see that the OS has enabled AVX -->
<check name="get_avx_enabled"></check>
<flag compiler="gnu">-mavx</flag>
<flag compiler="clang">-mavx</flag>
<flag compiler="msvc">/arch:AVX</flag>
<alignment>32</alignment>
</arch>

<arch name="avx2">
<check name="cpuid_count_x86_bit">
<check name="avx2">
<param>7</param>
<param>0</param>
<param>1</param>
<param>5</param>
</check>
<!-- check to make sure that xgetbv is enabled in OS -->
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>27</param>
</check>
<!-- check to see that the OS has enabled AVX2 -->
<check name="get_avx2_enabled"></check>
<flag compiler="gnu">-mavx2</flag>
<flag compiler="clang">-mavx2</flag>
<flag compiler="msvc">/arch:AVX2</flag>
Expand All @@ -249,20 +238,12 @@

<arch name="avx512f">
<!-- check for AVX512F -->
<check name="cpuid_count_x86_bit">
<check name="avx512f">
<param>7</param>
<param>0</param>
<param>1</param>
<param>16</param>
</check>
<!-- check to make sure that xgetbv is enabled in OS -->
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>27</param>
</check>
<!-- check to see that the OS has enabled AVX512 -->
<check name="get_avx512_enabled"></check>
<flag compiler="gnu">-mavx512f</flag>
<flag compiler="clang">-mavx512f</flag>
<flag compiler="msvc">/arch:AVX512F</flag>
Expand All @@ -271,20 +252,12 @@

<arch name="avx512cd">
<!-- check for AVX512CD -->
<check name="cpuid_count_x86_bit">
<check name="avx512cd">
<param>7</param>
<param>0</param>
<param>1</param>
<param>28</param>
</check>
<!-- check to make sure that xgetbv is enabled in OS -->
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>27</param>
</check>
<!-- check to see that the OS has enabled AVX512 -->
<check name="get_avx512_enabled"></check>
<flag compiler="gnu">-mavx512cd</flag>
<flag compiler="clang">-mavx512cd</flag>
<flag compiler="msvc">/arch:AVX512CD</flag>
Expand Down
5 changes: 4 additions & 1 deletion lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ endif()
#include directories is taken as provided; it -might- matter, but
#probably doesn't.
add_library(volk SHARED $<TARGET_OBJECTS:volk_obj>)
target_link_libraries(volk PUBLIC ${volk_libraries})
target_link_libraries(volk PUBLIC ${volk_libraries} cpu_features)
target_include_directories(volk
PUBLIC $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
Expand All @@ -552,6 +552,9 @@ target_include_directories(volk
PUBLIC $<INSTALL_INTERFACE:include>
)

include_directories(../cpu_features/include)


#Configure target properties
if(ORC_FOUND)
target_link_libraries(volk PRIVATE ${ORC_LIBRARIES})
Expand Down
Loading

0 comments on commit 5e2193c

Please sign in to comment.