Skip to content

Commit 895e9e5

Browse files
initial Neo enabling on architectures other than x86
Related-To: NEO-6011 Signed-off-by: Artur Harasimiuk <artur.harasimiuk@intel.com>
1 parent f958b05 commit 895e9e5

File tree

28 files changed

+8801
-100
lines changed

28 files changed

+8801
-100
lines changed

CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,20 @@ else()
149149
set(NEO_ARCH "x86")
150150
endif()
151151

152+
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
153+
set(NEO_TARGET_PROCESSOR "x86_64")
154+
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
155+
set(NEO_TARGET_PROCESSOR "x86_64")
156+
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
157+
set(NEO_TARGET_PROCESSOR "aarch64")
158+
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/sse2neon)
159+
endif()
160+
message(STATUS "Target processor: ${NEO_TARGET_PROCESSOR}")
161+
162+
if(NOT DEFINED NEO_TARGET_PROCESSOR)
163+
message(FATAL_ERROR "Unsupported target processor: ${CMAKE_SYSTEM_PROCESSOR}")
164+
endif()
165+
152166
if(NOT DEFINED BUILD_WITH_L0)
153167
if("${NEO_BITS}" STREQUAL "64")
154168
set(BUILD_WITH_L0 TRUE)
@@ -847,6 +861,8 @@ else()
847861
else()
848862
message(WARNING "Spectre mitigation DISABLED")
849863
endif()
864+
check_cxx_compiler_flag(-msse4.2 COMPILER_SUPPORTS_SSE42)
865+
check_cxx_compiler_flag(-mavx2 COMPILER_SUPPORTS_AVX2)
850866
endif()
851867

852868
if(NOT MSVC)

level_zero/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,9 @@ if(BUILD_WITH_L0)
485485
)
486486

487487
if(UNIX)
488-
target_link_libraries(${TARGET_NAME_L0} ${GMM_LINK_NAME})
488+
if(${NEO_TARGET_PROCESSOR} STREQUAL "x86_64")
489+
target_link_libraries(${TARGET_NAME_L0} ${GMM_LINK_NAME})
490+
endif()
489491

490492
set_property(TARGET ${TARGET_NAME_L0}
491493
APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/core/source/dll/linux/ze.exports"

manifests/manifest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ components:
2020
infra:
2121
branch: master
2222
dest_dir: infra
23-
revision: a6b4272e6e2ebd1965b656a0d247038a1111cc58
23+
revision: 6f8216baa8dbd1c185c7dcd5349a8aa7ae0e5591
2424
type: git
2525
internal:
2626
branch: master

opencl/source/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,9 @@ if(${GENERATE_EXECUTABLE})
189189
${NEO_SHARED_DIRECTORY}/os_interface/windows/gmm_interface_win.cpp
190190
)
191191
else()
192-
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${GMM_LINK_NAME})
192+
if(${NEO_TARGET_PROCESSOR} STREQUAL "x86_64")
193+
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${GMM_LINK_NAME})
194+
endif()
193195
target_include_directories(${NEO_DYNAMIC_LIB_NAME} PRIVATE
194196
${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}
195197
)

opencl/test/unit_test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ if(USE_ASAN)
111111
set(GTEST_ENV "LSAN_OPTIONS=suppressions=${CMAKE_CURRENT_SOURCE_DIR}/lsan_suppressions.txt")
112112
endif()
113113

114-
if(NOT MSVC)
114+
if(COMPILER_SUPPORTS_SSE42)
115115
set_source_files_properties(helpers/uint16_sse4_tests.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
116116
endif()
117117

opencl/test/unit_test/helpers/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,19 @@ set(IGDRCL_SRCS_tests_helpers
3737
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_tests.h
3838
${CMAKE_CURRENT_SOURCE_DIR}/transfer_properties_tests.cpp
3939
${CMAKE_CURRENT_SOURCE_DIR}/ult_limits.h
40-
${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4_tests.cpp
4140
${CMAKE_CURRENT_SOURCE_DIR}/validator_tests.cpp
4241
${NEO_SHARED_TEST_DIRECTORY}/common/helpers/aligned_memory_tests.cpp
4342
${NEO_SHARED_TEST_DIRECTORY}/common/helpers/debug_manager_state_restore.h
4443
${NEO_SHARED_TEST_DIRECTORY}/common/helpers/unit_test_helper.h
4544
${NEO_SHARED_TEST_DIRECTORY}/common/helpers/unit_test_helper.inl
4645
)
4746

47+
if(MSVC OR COMPILER_SUPPORTS_SSE42)
48+
list(APPEND IGDRCL_SRCS_tests_helpers
49+
${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4_tests.cpp
50+
)
51+
endif()
52+
4853
if(TESTS_XEHP_AND_LATER)
4954
list(APPEND IGDRCL_SRCS_tests_helpers
5055
${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_hw_tests_xehp_and_later.cpp

opencl/test/unit_test/helpers/uint16_sse4_tests.cpp

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,29 +13,29 @@
1313
using namespace NEO;
1414

1515
TEST(Uint16Sse4, GivenMaskWhenCastingToBoolThenTrueIsReturned) {
16-
EXPECT_TRUE(static_cast<bool>(uint16x8_t::mask()));
16+
EXPECT_TRUE(static_cast<bool>(NEO::uint16x8_t::mask()));
1717
}
1818

1919
TEST(Uint16Sse4, GivenZeroWhenCastingToBoolThenFalseIsReturned) {
20-
EXPECT_FALSE(static_cast<bool>(uint16x8_t::zero()));
20+
EXPECT_FALSE(static_cast<bool>(NEO::uint16x8_t::zero()));
2121
}
2222

2323
TEST(Uint16Sse4, WhenConjoiningMaskAndZeroThenBooleanResultIsCorrect) {
24-
EXPECT_TRUE(uint16x8_t::mask() && uint16x8_t::mask());
25-
EXPECT_FALSE(uint16x8_t::mask() && uint16x8_t::zero());
26-
EXPECT_FALSE(uint16x8_t::zero() && uint16x8_t::mask());
27-
EXPECT_FALSE(uint16x8_t::zero() && uint16x8_t::zero());
24+
EXPECT_TRUE(NEO::uint16x8_t::mask() && NEO::uint16x8_t::mask());
25+
EXPECT_FALSE(NEO::uint16x8_t::mask() && NEO::uint16x8_t::zero());
26+
EXPECT_FALSE(NEO::uint16x8_t::zero() && NEO::uint16x8_t::mask());
27+
EXPECT_FALSE(NEO::uint16x8_t::zero() && NEO::uint16x8_t::zero());
2828
}
2929

3030
TEST(Uint16Sse4, GivenOneWhenCreatingThenInstancesAreSame) {
31-
auto one = uint16x8_t::one();
32-
uint16x8_t alsoOne(one.value);
33-
EXPECT_EQ(0, memcmp(&alsoOne, &one, sizeof(uint16x8_t)));
31+
auto one = NEO::uint16x8_t::one();
32+
NEO::uint16x8_t alsoOne(one.value);
33+
EXPECT_EQ(0, memcmp(&alsoOne, &one, sizeof(NEO::uint16x8_t)));
3434
}
3535

3636
TEST(Uint16Sse4, GivenValueWhenCreatingThenConstructorIsReplicated) {
37-
uint16x8_t allSevens(7u);
38-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
37+
NEO::uint16x8_t allSevens(7u);
38+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
3939
EXPECT_EQ(7u, allSevens.get(i));
4040
}
4141
}
@@ -46,34 +46,34 @@ static const uint16_t laneValues[] = {
4646
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
4747

4848
TEST(Uint16Sse4, GivenArrayWhenCreatingThenConstructorIsReplicated) {
49-
uint16x8_t lanes(laneValues);
50-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
49+
NEO::uint16x8_t lanes(laneValues);
50+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
5151
EXPECT_EQ(static_cast<uint16_t>(i), lanes.get(i));
5252
}
5353
}
5454

5555
TEST(Uint16Sse4, WhenLoadingThenValuesAreSetCorrectly) {
56-
uint16x8_t lanes;
56+
NEO::uint16x8_t lanes;
5757
lanes.load(laneValues);
58-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
58+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
5959
EXPECT_EQ(static_cast<uint16_t>(i), lanes.get(i));
6060
}
6161
}
6262

6363
TEST(Uint16Sse4, WhenLoadingUnalignedThenValuesAreSetCorrectly) {
64-
uint16x8_t lanes;
64+
NEO::uint16x8_t lanes;
6565
lanes.loadUnaligned(laneValues + 1);
66-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
66+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
6767
EXPECT_EQ(static_cast<uint16_t>(i + 1), lanes.get(i));
6868
}
6969
}
7070

7171
TEST(Uint16Sse4, WhenStoringThenValuesAreSetCorrectly) {
7272
uint16_t *alignedMemory = reinterpret_cast<uint16_t *>(alignedMalloc(1024, 32));
7373

74-
uint16x8_t lanes(laneValues);
74+
NEO::uint16x8_t lanes(laneValues);
7575
lanes.store(alignedMemory);
76-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
76+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
7777
EXPECT_EQ(static_cast<uint16_t>(i), alignedMemory[i]);
7878
}
7979

@@ -83,49 +83,49 @@ TEST(Uint16Sse4, WhenStoringThenValuesAreSetCorrectly) {
8383
TEST(Uint16Sse4, WhenStoringUnalignedThenValuesAreSetCorrectly) {
8484
uint16_t *alignedMemory = reinterpret_cast<uint16_t *>(alignedMalloc(1024, 32));
8585

86-
uint16x8_t lanes(laneValues);
86+
NEO::uint16x8_t lanes(laneValues);
8787
lanes.storeUnaligned(alignedMemory + 1);
88-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
88+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
8989
EXPECT_EQ(static_cast<uint16_t>(i), (alignedMemory + 1)[i]);
9090
}
9191

9292
alignedFree(alignedMemory);
9393
}
9494

9595
TEST(Uint16Sse4, WhenDecrementingThenValuesAreSetCorrectly) {
96-
uint16x8_t result(laneValues);
97-
result -= uint16x8_t::one();
96+
NEO::uint16x8_t result(laneValues);
97+
result -= NEO::uint16x8_t::one();
9898

99-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
99+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
100100
EXPECT_EQ(static_cast<uint16_t>(i - 1), result.get(i));
101101
}
102102
}
103103

104104
TEST(Uint16Sse4, WhenIncrementingThenValuesAreSetCorrectly) {
105-
uint16x8_t result(laneValues);
106-
result += uint16x8_t::one();
105+
NEO::uint16x8_t result(laneValues);
106+
result += NEO::uint16x8_t::one();
107107

108-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
108+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
109109
EXPECT_EQ(static_cast<uint16_t>(i + 1), result.get(i));
110110
}
111111
}
112112

113113
TEST(Uint16Sse4, WhenBlendingThenValuesAreSetCorrectly) {
114-
uint16x8_t a(uint16x8_t::one());
115-
uint16x8_t b(uint16x8_t::zero());
116-
uint16x8_t c;
114+
NEO::uint16x8_t a(NEO::uint16x8_t::one());
115+
NEO::uint16x8_t b(NEO::uint16x8_t::zero());
116+
NEO::uint16x8_t c;
117117

118118
// c = mask ? a : b
119-
c = blend(a, b, uint16x8_t::mask());
119+
c = blend(a, b, NEO::uint16x8_t::mask());
120120

121-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
121+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
122122
EXPECT_EQ(a.get(i), c.get(i));
123123
}
124124

125125
// c = mask ? a : b
126-
c = blend(a, b, uint16x8_t::zero());
126+
c = blend(a, b, NEO::uint16x8_t::zero());
127127

128-
for (int i = 0; i < uint16x8_t::numChannels; ++i) {
128+
for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) {
129129
EXPECT_EQ(b.get(i), c.get(i));
130130
}
131131
}

scripts/packaging/l0_gpu_driver/sles_15.2/SPECS/l0_gpu_driver.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ exposing hardware capabilities to applications.
6565
-DRELEASE_WITH_REGKEYS=TRUE \
6666
-DL0_INSTALL_UDEV_RULES=1 \
6767
-DUDEV_RULES_DIR=/etc/udev/rules.d/ \
68+
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF \
6869
-Wno-dev
6970
%make_build
7071

scripts/packaging/opencl/sles_15.2/SPECS/opencl.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Summary: ocloc package for opencl
5656
-DCMAKE_INSTALL_PREFIX=/usr \
5757
-DSKIP_UNIT_TESTS=1 \
5858
-DRELEASE_WITH_REGKEYS=1 \
59+
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF \
5960
-Wno-dev
6061
%make_build
6162

shared/source/CMakeLists.txt

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,14 @@ function(generate_shared_lib LIB_NAME MOCKABLE)
6161

6262
# Enable SSE4/AVX2 options for files that need them
6363
if(MSVC)
64-
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS /arch:AVX2)
64+
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/${NEO_TARGET_PROCESSOR}/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS /arch:AVX2)
6565
else()
66-
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
67-
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/local_id_gen_sse4.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
66+
if(COMPILER_SUPPORTS_AVX2)
67+
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/${NEO_TARGET_PROCESSOR}/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
68+
endif()
69+
if(COMPILER_SUPPORTS_SSE42)
70+
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/local_id_gen_sse4.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
71+
endif()
6872
endif()
6973

7074
endfunction()

0 commit comments

Comments
 (0)