Skip to content

Commit

Permalink
Added missing copy_from() in neon for vector_aligned
Browse files Browse the repository at this point in the history
  • Loading branch information
ldh4 committed Feb 8, 2024
1 parent 442e4d4 commit 1502379
Showing 1 changed file with 25 additions and 1 deletion.
26 changes: 25 additions & 1 deletion simd/src/Kokkos_SIMD_NEON.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,10 +615,18 @@ class simd<float, simd_abi::neon_fixed_size<2>> {
element_aligned_tag) {
m_value = vld1_f32(ptr);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr,
vector_aligned_tag) {
m_value = vld1_f32(ptr);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(
value_type* ptr, element_aligned_tag) const {
vst1_f32(ptr, m_value);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr,
vector_aligned_tag) const {
vst1_f32(ptr, m_value);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit
operator float32x2_t() const {
return m_value;
Expand Down Expand Up @@ -1281,7 +1289,14 @@ class simd<std::uint64_t, simd_abi::neon_fixed_size<2>> {
vector_aligned_tag) {
m_value = vld1q_u64(ptr);
}

KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(
value_type* ptr, element_aligned_tag) const {
vst1q_u64(ptr, m_value);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr,
vector_aligned_tag) const {
vst1q_u64(ptr, m_value);
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator uint64x2_t()
const {
return m_value;
Expand Down Expand Up @@ -1513,6 +1528,11 @@ class const_where_expression<simd_mask<float, simd_abi::neon_fixed_size<2>>,
if (m_mask[1]) mem[1] = m_value[1];
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION
void copy_to(float* mem, vector_aligned_tag) const {
if (m_mask[0]) mem[0] = m_value[0];
if (m_mask[1]) mem[1] = m_value[1];
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION
void scatter_to(
float* mem,
simd<std::int32_t, simd_abi::neon_fixed_size<2>> const& index) const {
Expand Down Expand Up @@ -1547,6 +1567,10 @@ class where_expression<simd_mask<float, simd_abi::neon_fixed_size<2>>,
if (m_mask[0]) m_value[0] = mem[0];
if (m_mask[1]) m_value[1] = mem[1];
}
void copy_from(float const* mem, vector_aligned_tag) {
if (m_mask[0]) m_value[0] = mem[0];
if (m_mask[1]) m_value[1] = mem[1];
}
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION
void gather_from(
float const* mem,
Expand Down

0 comments on commit 1502379

Please sign in to comment.