-
Notifications
You must be signed in to change notification settings - Fork 794
[ESIMD] Allow full autodeduction for acc gather and slm_gather APIs accepting simd_view #13956
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3623,6 +3623,92 @@ gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | |
| return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props); | ||
| } | ||
|
|
||
| /// template <int VS, typename T, int N, typename OffsetSimdViewT, | ||
| // typename PropertyListT = empty_properties_t> | ||
| /// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, simd<T, N> pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// This function is identical to (lacc-ga-1) except that the \p byte_offsets | ||
| /// is represented as \c simd_view. | ||
| template < | ||
| int VS, typename T, int N, typename AccessorT, typename OffsetSimdViewT, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_device_accessor_with_v<AccessorT, | ||
| detail::accessor_mode_cap::can_read> && | ||
| detail::is_simd_view_type_v<OffsetSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | ||
| simd<T, N> pass_thru, PropertyListT props = {}) { | ||
| static_assert(N / VS == | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We might want to make an internal tracker to add these asserts to the APIs we already changed |
||
| OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), | ||
| "Size of pass_thru parameter must correspond to the size of " | ||
| "byte_offsets parameter."); | ||
| return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props); | ||
| } | ||
|
|
||
| /// template <int VS = 1, typename AccessorT, | ||
| /// typename OffsetSimdViewT, typename PassThruSimdViewT, | ||
| /// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| /// typename T = PassThruSimdViewT::value_type::element_type, | ||
| /// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>> | ||
| /// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// This function is identical to (lacc-ga-1) except that the \p byte_offsets | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this comment isn't accurate for a few of the overloads
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed some comments |
||
| /// and \p pass_thru are represented as \c simd_view. | ||
| template < | ||
| int VS = 1, typename AccessorT, typename OffsetSimdViewT, | ||
| typename PassThruSimdViewT, | ||
| int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| typename T = PassThruSimdViewT::value_type::element_type, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_device_accessor_with_v<AccessorT, | ||
| detail::accessor_mode_cap::can_read> && | ||
| detail::is_simd_view_type_v<OffsetSimdViewT> && | ||
| detail::is_simd_view_type_v<PassThruSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | ||
| PassThruSimdViewT pass_thru, PropertyListT props = {}) { | ||
| static_assert(N / VS == | ||
| OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), | ||
| "Size of pass_thru parameter must correspond to the size of " | ||
| "byte_offsets parameter."); | ||
| return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru.read(), | ||
| props); | ||
| } | ||
|
|
||
| /// template <int VS = 1, typename AccessorT, | ||
| /// typename OffsetT, typename PassThruSimdViewT, | ||
| /// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| /// typename T = PassThruSimdViewT::value_type::element_type, | ||
| /// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>> | ||
| /// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, simd<T, N> pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// This function is identical to (lacc-ga-1) except that the \p byte_offsets | ||
| /// is represented as \c simd_view. | ||
| template < | ||
| int VS = 1, typename AccessorT, typename OffsetT, | ||
| typename PassThruSimdViewT, | ||
| int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| typename T = PassThruSimdViewT::value_type::element_type, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_device_accessor_with_v<AccessorT, | ||
| detail::accessor_mode_cap::can_read> && | ||
| detail::is_simd_view_type_v<PassThruSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets, | ||
| simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, | ||
| PropertyListT props = {}) { | ||
| return gather<T, N, VS>(acc, byte_offsets, mask, pass_thru.read(), props); | ||
| } | ||
|
|
||
| /// template <typename T, int N, int VS = 1, typename AccessorT, | ||
| /// typename OffsetSimdViewT, | ||
| // typename PropertyListT = empty_properties_t> | ||
|
|
@@ -4672,6 +4758,140 @@ slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | |
| return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru, props); | ||
| } | ||
|
|
||
| /// template <int VS, typename T, int N, typename OffsetSimdViewT, | ||
| /// typename PropertyListT = empty_props_t> | ||
| /// simd <T, N> slm_gather( | ||
| /// OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, simd<T, N> pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// Variation of the API that allows to use \c simd_view without specifying \c T | ||
| /// and \c N template parameters. | ||
| /// Loads ("gathers") elements of the type 'T' from Shared Local Memory | ||
| /// locations addressed by byte offsets \p byte_offsets, and returns the loaded | ||
| /// elements. Access to any element's memory location can be disabled via the | ||
| /// input vector of predicates \p mask. If mask[i] is unset, then the load from | ||
| /// (byte_offsets[i]) is skipped and the corresponding i-th element from | ||
| /// \p pass_thru operand is returned. | ||
| /// @tparam VS Vector size. It can also be read as the number of reads per each | ||
| /// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported | ||
| /// only on DG2 and PVC. | ||
| /// @param byte_offsets the vector of 32-bit offsets in bytes. | ||
| /// For each i, (byte_offsets[i]) must be element size aligned. | ||
| /// If the alignment property is not passed, then it is assumed that each | ||
| /// accessed address is aligned by element-size. | ||
| /// @param mask The access mask, defaults to all 1s. | ||
| /// @param pass_thru The vector pass through values. | ||
| /// @param props The optional compile-time properties. Only 'alignment' | ||
| /// property is used. | ||
| /// @return A vector of elements read. | ||
| template < | ||
| int VS, typename T, int N, typename OffsetSimdViewT, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_simd_view_type_v<OffsetSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | ||
| simd<T, N> pass_thru, PropertyListT props = {}) { | ||
| static_assert(N / VS == | ||
| OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), | ||
| "Size of pass_thru parameter must correspond to the size of " | ||
| "byte_offsets parameter."); | ||
| return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru, props); | ||
| } | ||
|
|
||
| /// template <int VS = 1, | ||
| /// typename OffsetSimdViewT, typename PassThruSimdViewT, | ||
| /// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| /// typename T = PassThruSimdViewT::value_type::element_type, | ||
| /// typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| /// simd <T, N> slm_gather( | ||
| /// OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// Variation of the API that allows to use \c simd_view without specifying \c T | ||
| /// and \c N template parameters. | ||
| /// Loads ("gathers") elements of the type 'T' from Shared Local Memory | ||
| /// locations addressed by byte offsets \p byte_offsets, and returns the loaded | ||
| /// elements. Access to any element's memory location can be disabled via the | ||
| /// input vector of predicates \p mask. If mask[i] is unset, then the load from | ||
| /// (byte_offsets[i]) is skipped and the corresponding i-th element from | ||
| /// \p pass_thru operand is returned. | ||
| /// @tparam VS Vector size. It can also be read as the number of reads per each | ||
| /// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported | ||
| /// only on DG2 and PVC. | ||
| /// @param byte_offsets the vector of 32-bit offsets in bytes. | ||
| /// For each i, (byte_offsets[i]) must be element size aligned. | ||
| /// If the alignment property is not passed, then it is assumed that each | ||
| /// accessed address is aligned by element-size. | ||
| /// @param mask The access mask, defaults to all 1s. | ||
| /// @param pass_thru The vector pass through values. | ||
| /// @param props The optional compile-time properties. Only 'alignment' | ||
| /// property is used. | ||
| /// @return A vector of elements read. | ||
| template < | ||
| int VS = 1, typename OffsetSimdViewT, typename PassThruSimdViewT, | ||
| int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| typename T = PassThruSimdViewT::value_type::element_type, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_simd_view_type_v<OffsetSimdViewT> && | ||
| detail::is_simd_view_type_v<PassThruSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | ||
| PassThruSimdViewT pass_thru, PropertyListT props = {}) { | ||
| static_assert(N / VS == | ||
| OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), | ||
| "Size of pass_thru parameter must correspond to the size of " | ||
| "byte_offsets parameter."); | ||
| return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru.read(), | ||
| props); | ||
| } | ||
|
|
||
| /// template <int VS = 1, | ||
| /// typename PassThruSimdViewT, | ||
| /// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| /// typename T = PassThruSimdViewT::value_type::element_type, | ||
| /// typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| /// simd <T, N> slm_gather( | ||
| /// OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// Variation of the API that allows to use \c simd_view without specifying \c T | ||
| /// and \c N template parameters. | ||
| /// Loads ("gathers") elements of the type 'T' from Shared Local Memory | ||
| /// locations addressed by byte offsets \p byte_offsets, and returns the loaded | ||
| /// elements. Access to any element's memory location can be disabled via the | ||
| /// input vector of predicates \p mask. If mask[i] is unset, then the load from | ||
| /// (byte_offsets[i]) is skipped and the corresponding i-th element from | ||
| /// \p pass_thru operand is returned. | ||
| /// @tparam VS Vector size. It can also be read as the number of reads per each | ||
| /// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported | ||
| /// only on DG2 and PVC. | ||
| /// @param byte_offsets the vector of 32-bit offsets in bytes. | ||
| /// For each i, (byte_offsets[i]) must be element size aligned. | ||
| /// If the alignment property is not passed, then it is assumed that each | ||
| /// accessed address is aligned by element-size. | ||
| /// @param mask The access mask, defaults to all 1s. | ||
| /// @param pass_thru The vector pass through values. | ||
| /// @param props The optional compile-time properties. Only 'alignment' | ||
| /// property is used. | ||
| /// @return A vector of elements read. | ||
| template < | ||
| int VS = 1, typename PassThruSimdViewT, | ||
| int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| typename T = PassThruSimdViewT::value_type::element_type, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_simd_view_type_v<PassThruSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask, | ||
| PassThruSimdViewT pass_thru, PropertyListT props = {}) { | ||
| return slm_gather<T, N, VS>(byte_offsets, mask, pass_thru.read(), props); | ||
| } | ||
|
|
||
| /// simd <T, N> slm_gather( | ||
| /// OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8) | ||
|
|
@@ -8420,6 +8640,91 @@ gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | |
| return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props); | ||
| } | ||
|
|
||
| /// template <int VS, typename T, int N, typename OffsetSimdViewT, | ||
| // typename PropertyListT = empty_properties_t> | ||
| /// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, simd<T, N> pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// This function is identical to (lacc-ga-1) except that the \p byte_offsets | ||
| /// is represented as \c simd_view. | ||
| template < | ||
| int VS, typename T, int N, typename AccessorT, typename OffsetSimdViewT, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_local_accessor_with_v<AccessorT, | ||
| detail::accessor_mode_cap::can_read> && | ||
| detail::is_simd_view_type_v<OffsetSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | ||
| simd<T, N> pass_thru, PropertyListT props = {}) { | ||
| static_assert(N / VS == | ||
| OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), | ||
| "Size of pass_thru parameter must correspond to the size of " | ||
| "byte_offsets parameter."); | ||
| return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props); | ||
| } | ||
|
|
||
| /// template <int VS = 1, typename AccessorT, | ||
| /// typename OffsetSimdViewT, typename PassThruSimdViewT, | ||
| /// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| /// typename T = PassThruSimdViewT::value_type::element_type, | ||
| /// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>> | ||
| /// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets, | ||
| /// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// This function is identical to (lacc-ga-1) except that the \p byte_offsets | ||
| /// and \p pass_thru are represented as \c simd_view. | ||
| template < | ||
| int VS = 1, typename AccessorT, typename OffsetSimdViewT, | ||
| typename PassThruSimdViewT, | ||
| int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| typename T = PassThruSimdViewT::value_type::element_type, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_local_accessor_with_v<AccessorT, | ||
| detail::accessor_mode_cap::can_read> && | ||
| detail::is_simd_view_type_v<OffsetSimdViewT> && | ||
| detail::is_simd_view_type_v<PassThruSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask, | ||
| PassThruSimdViewT pass_thru, PropertyListT props = {}) { | ||
| static_assert(N / VS == | ||
| OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(), | ||
| "Size of pass_thru parameter must correspond to the size of " | ||
| "byte_offsets parameter."); | ||
| return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru.read(), | ||
| props); | ||
| } | ||
|
|
||
| /// template <int VS = 1, typename AccessorT, | ||
| /// typename PassThruSimdViewT, | ||
| /// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| /// typename T = PassThruSimdViewT::value_type::element_type, | ||
| /// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>> | ||
| /// simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets, | ||
| /// simd_mask<N / VS> mask, simd<T, N> pass_thru, | ||
| /// PropertyListT props = {}); | ||
| /// This function is identical to (lacc-ga-1) except that the \p pass_thru | ||
| /// is represented as \c simd_view. | ||
| template < | ||
| int VS = 1, typename AccessorT, typename PassThruSimdViewT, | ||
| int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(), | ||
| typename T = PassThruSimdViewT::value_type::element_type, | ||
| typename PropertyListT = ext::oneapi::experimental::empty_properties_t> | ||
| __ESIMD_API std::enable_if_t< | ||
| (detail::is_local_accessor_with_v<AccessorT, | ||
| detail::accessor_mode_cap::can_read> && | ||
| detail::is_simd_view_type_v<PassThruSimdViewT> && | ||
| ext::oneapi::experimental::is_property_list_v<PropertyListT>), | ||
| simd<T, N>> | ||
| gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets, | ||
| simd_mask<N / VS> mask, PassThruSimdViewT pass_thru, | ||
| PropertyListT props = {}) { | ||
| return gather<T, N, VS>(acc, byte_offsets, mask, pass_thru.read(), props); | ||
| } | ||
|
|
||
| /// template <typename T, int N, int VS = 1, typename AccessorT, | ||
| /// typename OffsetSimdViewT, | ||
| // typename PropertyListT = empty_properties_t> | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to compute N here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case no, since it has pass_thru parameter of simd<T,N> type and can deduct N from there
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think in such cases it is useful to have a static_assert verifying that N from pass_thru and N from simd_view object are identical.
It would be much friendlier than seeing a long list consisting of 1 error + long list of notes explaining why the call could not be resolved.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added static_asserts where possible