Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Related-To: NEO-6452 Signed-off-by: Sebastian Luzynski <sebastian.jozef.luzynski@intel.com>
- Loading branch information
1 parent
c7d8915
commit cf90603
Showing
16 changed files
with
445 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/* | ||
* Copyright (C) 2022 Intel Corporation | ||
* | ||
* SPDX-License-Identifier: MIT | ||
* | ||
*/ | ||
|
||
#include "shared/source/helpers/aarch64/uint16_neon.h" | ||
#include "shared/source/helpers/local_id_gen.inl" | ||
|
||
#include <array> | ||
|
||
namespace NEO { | ||
template void generateLocalIDsSimd<uint16x16_t, 8>(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize); | ||
template void generateLocalIDsSimd<uint16x16_t, 16>(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize); | ||
template void generateLocalIDsSimd<uint16x16_t, 32>(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize); | ||
} // namespace NEO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
/* | ||
* Copyright (C) 2022 Intel Corporation | ||
* | ||
* SPDX-License-Identifier: MIT | ||
* | ||
*/ | ||
|
||
#pragma once | ||
#include "shared/source/helpers/aligned_memory.h" | ||
#include "shared/source/helpers/debug_helpers.h" | ||
|
||
#include <arm_neon.h> | ||
#include <cstdint> | ||
|
||
namespace NEO { | ||
|
||
struct uint16x16_t { | ||
enum { numChannels = 16 }; | ||
|
||
uint16x8x2_t value; | ||
|
||
uint16x16_t() { | ||
value.val[0] = vdupq_n_u16(0); | ||
value.val[1] = vdupq_n_u16(0); | ||
} | ||
|
||
uint16x16_t(uint16x8_t lo, uint16x8_t hi) { | ||
value.val[0] = lo; | ||
value.val[1] = hi; | ||
} | ||
|
||
uint16x16_t(uint16_t a) { | ||
value.val[0] = vdupq_n_u16(a); | ||
value.val[1] = vdupq_n_u16(a); | ||
} | ||
|
||
explicit uint16x16_t(const void *alignedPtr) { | ||
load(alignedPtr); | ||
} | ||
|
||
inline uint16_t get(unsigned int element) { | ||
DEBUG_BREAK_IF(element >= numChannels); | ||
uint16_t result; | ||
// vgetq_lane requires constant immediate | ||
switch (element) { | ||
case 0: | ||
result = vgetq_lane_u16(value.val[0], 0); | ||
break; | ||
case 1: | ||
result = vgetq_lane_u16(value.val[0], 1); | ||
break; | ||
case 2: | ||
result = vgetq_lane_u16(value.val[0], 2); | ||
break; | ||
case 3: | ||
result = vgetq_lane_u16(value.val[0], 3); | ||
break; | ||
case 4: | ||
result = vgetq_lane_u16(value.val[0], 4); | ||
break; | ||
case 5: | ||
result = vgetq_lane_u16(value.val[0], 5); | ||
break; | ||
case 6: | ||
result = vgetq_lane_u16(value.val[0], 6); | ||
break; | ||
case 7: | ||
result = vgetq_lane_u16(value.val[0], 7); | ||
break; | ||
case 8: | ||
result = vgetq_lane_u16(value.val[1], 0); | ||
break; | ||
case 9: | ||
result = vgetq_lane_u16(value.val[1], 1); | ||
break; | ||
case 10: | ||
result = vgetq_lane_u16(value.val[1], 2); | ||
break; | ||
case 11: | ||
result = vgetq_lane_u16(value.val[1], 3); | ||
break; | ||
case 12: | ||
result = vgetq_lane_u16(value.val[1], 4); | ||
break; | ||
case 13: | ||
result = vgetq_lane_u16(value.val[1], 5); | ||
break; | ||
case 14: | ||
result = vgetq_lane_u16(value.val[1], 6); | ||
break; | ||
case 15: | ||
result = vgetq_lane_u16(value.val[1], 7); | ||
break; | ||
} | ||
|
||
return result; | ||
} | ||
|
||
static inline uint16x16_t zero() { | ||
return uint16x16_t(static_cast<uint16_t>(0u)); | ||
} | ||
|
||
static inline uint16x16_t one() { | ||
return uint16x16_t(static_cast<uint16_t>(1u)); | ||
} | ||
|
||
static inline uint16x16_t mask() { | ||
return uint16x16_t(static_cast<uint16_t>(0xffffu)); | ||
} | ||
|
||
inline void load(const void *alignedPtr) { | ||
DEBUG_BREAK_IF(!isAligned<32>(alignedPtr)); | ||
value = vld1q_u16_x2(reinterpret_cast<const uint16_t *>(alignedPtr)); | ||
} | ||
|
||
inline void store(void *alignedPtr) { | ||
DEBUG_BREAK_IF(!isAligned<32>(alignedPtr)); | ||
vst1q_u16_x2(reinterpret_cast<uint16_t *>(alignedPtr), value); | ||
} | ||
|
||
inline operator bool() const { | ||
uint64x2_t hi = vreinterpretq_u64_u16(value.val[0]); | ||
uint64x2_t lo = vreinterpretq_u64_u16(value.val[1]); | ||
uint64x2_t tmp = vorrq_u64(hi, lo); | ||
uint64_t result = vget_lane_u64(vorr_u64(vget_high_u64(tmp), vget_low_u64(tmp)), 0); | ||
|
||
return result; | ||
} | ||
|
||
inline uint16x16_t &operator-=(const uint16x16_t &a) { | ||
value.val[0] = vsubq_u16(value.val[0], a.value.val[0]); | ||
value.val[1] = vsubq_u16(value.val[1], a.value.val[1]); | ||
|
||
return *this; | ||
} | ||
|
||
inline uint16x16_t &operator+=(const uint16x16_t &a) { | ||
value.val[0] = vaddq_u16(value.val[0], a.value.val[0]); | ||
value.val[1] = vaddq_u16(value.val[1], a.value.val[1]); | ||
|
||
return *this; | ||
} | ||
|
||
inline friend uint16x16_t operator>=(const uint16x16_t &a, const uint16x16_t &b) { | ||
uint16x16_t result; | ||
|
||
result.value.val[0] = veorq_u16(mask().value.val[0], | ||
vcgtq_u16(b.value.val[0], a.value.val[0])); | ||
result.value.val[1] = veorq_u16(mask().value.val[1], | ||
vcgtq_u16(b.value.val[1], a.value.val[1])); | ||
return result; | ||
} | ||
|
||
inline friend uint16x16_t operator&&(const uint16x16_t &a, const uint16x16_t &b) { | ||
uint16x16_t result; | ||
|
||
result.value.val[0] = vandq_u16(a.value.val[0], b.value.val[0]); | ||
result.value.val[1] = vandq_u16(a.value.val[1], b.value.val[1]); | ||
|
||
return result; | ||
} | ||
|
||
// NOTE: uint16x16_t::blend behaves like mask ? a : b | ||
inline friend uint16x16_t blend(const uint16x16_t &a, const uint16x16_t &b, const uint16x16_t &mask) { | ||
uint16x16_t result; | ||
|
||
result.value.val[0] = vbslq_u16(mask.value.val[0], a.value.val[0], b.value.val[0]); | ||
result.value.val[1] = vbslq_u16(mask.value.val[1], a.value.val[1], b.value.val[1]); | ||
|
||
return result; | ||
} | ||
}; | ||
} // namespace NEO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,18 @@ | ||
/* | ||
* Copyright (C) 2021 Intel Corporation | ||
* Copyright (C) 2021-2022 Intel Corporation | ||
* | ||
* SPDX-License-Identifier: MIT | ||
* | ||
*/ | ||
|
||
#include "shared/source/utilities/cpu_info.h" | ||
|
||
#include <asm/hwcap.h> | ||
|
||
namespace NEO { | ||
void CpuInfo::detect() const { | ||
uint32_t cpuInfo[4] = {}; | ||
cpuid(cpuInfo, 0u); | ||
features |= cpuInfo[0] & HWCAP_ASIMD ? featureNeon : featureNone; | ||
} | ||
} // namespace NEO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.