Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include changes for Intel PVC pipeline #51

Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
3e063e4
Include changes for Intel PVC pipeline
muhammad-tanvir-1211 Apr 26, 2024
2dcc6cf
Fixed block size calculation
muhammad-tanvir-1211 Apr 26, 2024
e97a9ee
Reverted config.hpp changes
muhammad-tanvir-1211 Apr 26, 2024
d52f9a7
Move vector width calculation outside operator()
muhammad-tanvir-1211 Apr 29, 2024
2065f67
Add get_pvc_tensor method interface to copy traits
muhammad-tanvir-1211 Apr 29, 2024
b53ddde
Merge copy atom changes
muhammad-tanvir-1211 Apr 30, 2024
73849ba
Pipeline running with new copy atoms
muhammad-tanvir-1211 Apr 30, 2024
2115cf5
Fix bug in GEMM computation
muhammad-tanvir-1211 Apr 30, 2024
a8448ec
Fix register spill to improve performance
muhammad-tanvir-1211 May 6, 2024
7720fc5
Removed static_cast from coordinates
muhammad-tanvir-1211 May 6, 2024
b441aa4
Fix variable naming
muhammad-tanvir-1211 May 6, 2024
c100b45
Merge branch 'sycl-develop' of https://github.com/codeplaysoftware/cu…
muhammad-tanvir-1211 May 6, 2024
d2950df
Merge branch 'sycl-develop' of https://github.com/codeplaysoftware/cu…
muhammad-tanvir-1211 May 8, 2024
065a64a
Minor code changes
muhammad-tanvir-1211 May 9, 2024
fdb27b9
Merge branch 'sycl-develop' of https://github.com/codeplaysoftware/cu…
muhammad-tanvir-1211 May 9, 2024
a4cb592
Remove extra lines and add inline
muhammad-tanvir-1211 May 9, 2024
3b6f36e
Remove commented code
muhammad-tanvir-1211 May 9, 2024
028306f
Change register data type and fix typo
muhammad-tanvir-1211 May 10, 2024
c3ec983
Merge branch 'sycl-develop' of https://github.com/codeplaysoftware/cu…
muhammad-tanvir-1211 May 15, 2024
ae6c2ab
Pass sg_size for Intel PVC
muhammad-tanvir-1211 May 15, 2024
07b76e4
Change ifdef statements
muhammad-tanvir-1211 May 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions include/cute/atom/copy_traits_xe.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,17 @@ struct XE_2D_LD_Unpack
static_assert(is_rmem<TD>::value);
int H = size<0>(traits.tensor);
int W = size<1>(traits.tensor) * sizeof(typename Copy_Traits::CopyInternalType);
auto [y, x] = src.data().coord_;
CopyOp::copy(traits.tensor.data().get(), W, H, W, intel::coord_t{static_cast<int>(x), static_cast<int>(y)}, &*dst.data());
auto [y, x, z] = src.data().coord_;
CopyOp::copy(traits.tensor.data() + z, W, H, W, intel::coord_t{x, y}, &*dst.data());
}

template <class GCoord, class GShape, class GStride>
CUTE_HOST_DEVICE constexpr auto
get_pvc_tensor(GCoord const& coord, GShape const& shape, GStride const& stride_mul) const
{
return make_tensor(make_inttuple_iter(coord),
make_layout(make_shape(_1{}, get<0>(shape), get<1>(shape), get<2>(shape)),
make_stride(_1{}, E<0>{} * get<0>(stride_mul), E<1>{} * get<1>(stride_mul), E<2>{} * get<2>(stride(tensor)))));
}
};

Expand Down Expand Up @@ -274,8 +283,17 @@ struct XE_2D_ST_Unpack
static_assert(is_rmem<TS>::value);
int H = size<0>(traits.tensor);
int W = size<1>(traits.tensor) * sizeof(typename Copy_Traits::CopyInternalType);
auto [y, x] = dst.data().coord_;
CopyOp::copy(traits.tensor.data().get(), W, H, W, intel::coord_t{static_cast<int>(x), static_cast<int>(y)}, &*src.data());
auto [y, x, z] = dst.data().coord_;
CopyOp::copy(traits.tensor.data() + z, W, H, W, intel::coord_t{x, y}, &*src.data());
}

template <class GCoord, class GShape, class GStride>
CUTE_HOST_DEVICE constexpr auto
get_pvc_tensor(GCoord const& coord, GShape const& shape, GStride const& stride_mul) const
{
return make_tensor(make_inttuple_iter(coord),
make_layout(make_shape(_1{}, get<0>(shape), get<1>(shape), get<2>(shape)),
make_stride(_1{}, E<0>{} * get<0>(stride_mul), E<1>{} * get<1>(stride_mul), E<2>{} * get<2>(stride(tensor)))));
}
};

Expand Down
4 changes: 4 additions & 0 deletions include/cute/atom/mma_atom.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
#include <cute/tensor.hpp>
#include <cute/util/type_traits.hpp>

#if defined(CUTLASS_ENABLE_SYCL)
#include <cute/atom/mma_traits_xe.hpp>
#endif

namespace cute {

template <class... Args>
Expand Down
4 changes: 2 additions & 2 deletions include/cute/atom/mma_traits_xe.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ template <>
struct MMA_Traits<XE_8x16x16_BF16BF16F32F32_NN>
{
using ValTypeD = float;
using ValTypeA = sycl::ext::oneapi::bfloat16;
using ValTypeB = sycl::ext::oneapi::bfloat16;
using ValTypeA = bfloat16_t;
using ValTypeB = bfloat16_t;
using ValTypeC = float;

using Shape_MNK = Shape<_8,_16,_16>;
Expand Down
2 changes: 1 addition & 1 deletion include/cute/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# define CUTE_HOST_DEVICE __forceinline__ __host__ __device__
# define CUTE_DEVICE __forceinline__ __device__
# define CUTE_HOST __forceinline__ __host__
#elif defined(__SYCL_CUDA_ARCH__)
#elif defined(__SYCL_DEVICE_ONLY__)
# define CUTE_HOST_DEVICE __attribute__((always_inline))
mehdi-goli marked this conversation as resolved.
Show resolved Hide resolved
# define CUTE_DEVICE __attribute__((always_inline))
muhammad-tanvir-1211 marked this conversation as resolved.
Show resolved Hide resolved
# define CUTE_HOST inline
Expand Down
2 changes: 2 additions & 0 deletions include/cute/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#include <cute/pointer.hpp>
#include <cute/layout.hpp>

#include <iomanip>
muhammad-tanvir-1211 marked this conversation as resolved.
Show resolved Hide resolved

namespace cute
{

Expand Down
8 changes: 8 additions & 0 deletions include/cutlass/arch/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ struct Sm90 {
static int const kMinComputeCapability = 90;
};

#if defined(CUTLASS_ENABLE_SYCL)

muhammad-tanvir-1211 marked this conversation as resolved.
Show resolved Hide resolved
struct IntelPVC {
static int const kMinComputeCapability = 0;
};

#endif

/// Triggers a breakpoint on the device
CUTLASS_DEVICE
void device_breakpoint() {
Expand Down
4 changes: 4 additions & 0 deletions include/cutlass/gemm/collective/collective_mma.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,8 @@ struct CollectiveMma {
#include "cutlass/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized.hpp"
#include "cutlass/gemm/collective/sm90_mma_array_tma_gmma_ss_warpspecialized.hpp"
#include "cutlass/gemm/collective/sm90_mma_tma_gmma_ss_warpspecialized_fp8.hpp"

#if defined(CUTLASS_ENABLE_SYCL)
#include "cutlass/gemm/collective/intel_pvc_mma.hpp"
#endif
/////////////////////////////////////////////////////////////////////////////////////////////////
Loading