diff --git a/include/cutlass/cutlass.h b/include/cutlass/cutlass.h index 2a900a19d..b29ab353b 100644 --- a/include/cutlass/cutlass.h +++ b/include/cutlass/cutlass.h @@ -46,11 +46,6 @@ #pragma once #include "cutlass/detail/helper_macros.hpp" - -#if defined(CUTLASS_ENABLE_SYCL) -#include "syclcompat.hpp" -#endif - #include //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/include/cutlass/gpu_generics.h b/include/cutlass/gpu_generics.h index 6d9e152d5..e4efcb326 100644 --- a/include/cutlass/gpu_generics.h +++ b/include/cutlass/gpu_generics.h @@ -36,6 +36,11 @@ * frameworks such as CUDA and SYCL. */ +#if defined(CUTLASS_ENABLE_SYCL) +#include +#include +#endif + //////////////////////////////////////////////////////////////////////////////////////////////////// static const int NumThreadsPerWarp = 32; diff --git a/include/cutlass/matrix.h b/include/cutlass/matrix.h index ab32597e3..5d8ccb3c1 100644 --- a/include/cutlass/matrix.h +++ b/include/cutlass/matrix.h @@ -7825,7 +7825,7 @@ struct Matrix { Matrix m; - m.set_slice3x3({ + m.set_slice_3x3({ c + x * x * one_minus_cos, x * y * one_minus_cos - z * s, x * z * one_minus_cos + y * s, y * x * one_minus_cos * z * s, c + y * y * one_minus_cos, y * z * one_minus_cos - x * s, z * x * one_minus_cos - y * s, z * y * one_minus_cos + x * s, c + z * z * one_minus_cos @@ -7845,7 +7845,7 @@ struct Matrix { Matrix m = Matrix::identity(); - m.set_slice3x3({ + m.set_slice_3x3({ Element(1) - Element(2) * a * a, Element(-2) * a * b, Element(-2) * a * c, Element(-2) * a * b, Element(1) - Element(2) * b * b, Element(-2) * b * c, Element(-2) * a * c, Element(-2) * b * c, Element(1) - Element(2) * c * c @@ -14005,7 +14005,7 @@ struct Matrix { Matrix m; - m.set_slice3x3({ + m.set_slice_3x3({ c + x * x * one_minus_cos, x * y * one_minus_cos - z * s, x * z * one_minus_cos + y * s, y * x * one_minus_cos * z * s, c + y * y * one_minus_cos, y * z * one_minus_cos - x * s, z * x * one_minus_cos - y * s, z * y * one_minus_cos + x * s, c + z * z * one_minus_cos @@ -14025,7 +14025,7 @@ struct Matrix { Matrix m = Matrix::identity(); - m.set_slice3x3({ + m.set_slice_3x3({ Element(1) - Element(2) * a * a, Element(-2) * a * b, Element(-2) * a * c, Element(-2) * a * b, Element(1) - Element(2) * b * b, Element(-2) * b * c, Element(-2) * a * c, Element(-2) * b * c, Element(1) - Element(2) * c * c diff --git a/tools/util/include/cutlass/util/device_memory.h b/tools/util/include/cutlass/util/device_memory.h index 66262ac00..0b78640be 100644 --- a/tools/util/include/cutlass/util/device_memory.h +++ b/tools/util/include/cutlass/util/device_memory.h @@ -56,13 +56,14 @@ T* allocate(size_t count = 1) { T* ptr = 0; #if defined(CUTLASS_ENABLE_SYCL) - ptr = syclcompat::malloc(count); - if (ptr == nullptr) { - throw std::runtime_error("Failed to allocate memory"); + if (count > 0) { + ptr = reinterpret_cast(syclcompat::malloc(bytes)); + if ((void*)ptr == nullptr) { + throw std::runtime_error("Failed to allocate memory"); + } } #else size_t bytes = 0; - bytes = count * sizeof(T); cudaError_t cuda_error = cudaMalloc((void**)&ptr, bytes); @@ -78,7 +79,7 @@ T* allocate(size_t count = 1) { template void free(T* ptr) { #if defined(CUTLASS_ENABLE_SYCL) - syclcompat::free((void*)ptr); + syclcompat::free(ptr); if (ptr != nullptr) { throw std::runtime_error("Failed to free device memory"); }