diff --git a/benchmarks/gemm/benchmark_runner.hpp b/benchmarks/gemm/benchmark_runner.hpp index c7ea70cafa..5576411220 100644 --- a/benchmarks/gemm/benchmark_runner.hpp +++ b/benchmarks/gemm/benchmark_runner.hpp @@ -175,12 +175,12 @@ struct BenchmarkRunnerGemm { using CollectiveMainloop = typename Gemm::GemmKernel::CollectiveMainloop; using DispatchPolicy = typename CollectiveMainloop::DispatchPolicy; - using ElementMma = CollectiveMainloop::TiledMma::ValTypeA; + using ElementMma = typename CollectiveMainloop::TiledMma::ValTypeA; - using ElementScale = ScaleType::type; - using ElementZero = ZeroType::type; - using StrideS = ScaleStride::type; - using StrideZ = ZeroStride::type; + using ElementScale = typename ScaleType::type; + using ElementZero = typename ZeroType::type; + using StrideS = typename ScaleStride::type; + using StrideZ = typename ZeroStride::type; using CollectiveEpilogue = typename Gemm::CollectiveEpilogue; using ElementC = typename Gemm::ElementC; @@ -453,7 +453,10 @@ struct BenchmarkRunnerGemm { } bool verify(const ProblemShapeType& problem_size, ElementCompute alpha, ElementCompute beta) { - auto [M, N, K, L] = problem_size; + auto& M = cute::get<0>(problem_size); + auto& N = cute::get<1>(problem_size); + auto& K = cute::get<2>(problem_size); + auto& L = cute::get<3>(problem_size); TensorRef ref_C(block_C[0].get(), LayoutC::packed({M, N})); TensorRef ref_D(block_ref_D.get(), LayoutD::packed({M, N})); diff --git a/cmake/FindDPCPP.cmake b/cmake/FindDPCPP.cmake index 9f45285cdc..9acaa49f18 100644 --- a/cmake/FindDPCPP.cmake +++ b/cmake/FindDPCPP.cmake @@ -40,6 +40,7 @@ add_library(DPCPP::DPCPP INTERFACE IMPORTED) set(DPCPP_FLAGS "-fsycl;") set(DPCPP_COMPILE_ONLY_FLAGS "") +set(DPCPP_LINK_ONLY_FLAGS "") if(NOT "${DPCPP_SYCL_TARGET}" STREQUAL "") list(APPEND DPCPP_FLAGS "-fsycl-targets=${DPCPP_SYCL_TARGET};") @@ -63,10 +64,10 @@ if("${DPCPP_SYCL_TARGET}" STREQUAL "intel_gpu_pvc" OR "${DPCPP_SYCL_TARGET}" STREQUAL "spir64" OR "${DPCPP_SYCL_TARGET}" STREQUAL "intel_gpu_bmg_g21") if ((CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 2025.2) OR CUTLASS_SYCL_BUILTIN_ENABLE) - list(APPEND DPCPP_FLAGS "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier") + list(APPEND DPCPP_LINK_ONLY_FLAGS "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier") else() - list(APPEND DPCPP_FLAGS "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier,+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate") - endif() + list(APPEND DPCPP_LINK_ONLY_FLAGS "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier,+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate") + endif() if(DPCPP_DISABLE_ITT_FOR_CUTLASS) list(APPEND DPCPP_FLAGS "-fno-sycl-instrument-device-code") endif() @@ -76,14 +77,16 @@ endif() if(UNIX) set_target_properties(DPCPP::DPCPP PROPERTIES INTERFACE_COMPILE_OPTIONS "${DPCPP_FLAGS};${DPCPP_COMPILE_ONLY_FLAGS}" - INTERFACE_LINK_OPTIONS "${DPCPP_FLAGS}" + INTERFACE_LINK_OPTIONS "${DPCPP_FLAGS};${DPCPP_LINK_ONLY_FLAGS}" INTERFACE_LINK_LIBRARIES ${DPCPP_LIB_DIR} INTERFACE_INCLUDE_DIRECTORIES "${DPCPP_BIN_DIR}/../include/sycl;${DPCPP_BIN_DIR}/../include") message(STATUS "DPCPP INCLUDE DIR: ${DPCPP_BIN_DIR}/../include/sycl;${DPCPP_BIN_DIR}/../include") - message(STATUS "Using DPCPP flags: ${DPCPP_FLAGS};${DPCPP_COMPILE_ONLY_FLAGS}") + message(STATUS "Using DPCPP compile flags: ${DPCPP_FLAGS};${DPCPP_COMPILE_ONLY_FLAGS}") + message(STATUS "Using DPCPP link flags: ${DPCPP_FLAGS};${DPCPP_LINK_ONLY_FLAGS}") else() set_target_properties(DPCPP::DPCPP PROPERTIES INTERFACE_COMPILE_OPTIONS "${DPCPP_FLAGS};${DPCPP_COMPILE_ONLY_FLAGS}" + INTERFACE_LINK_OPTIONS "${DPCPP_FLAGS};${DPCPP_LINK_ONLY_FLAGS}" INTERFACE_LINK_LIBRARIES ${DPCPP_LIB_DIR} INTERFACE_INCLUDE_DIRECTORIES "${DPCPP_BIN_DIR}/../include/sycl") endif() @@ -105,7 +108,7 @@ function(add_sycl_to_target) ) get_target_property(target_type ${CUTLASS_ADD_SYCL_TARGET} TYPE) if (NOT target_type STREQUAL "OBJECT_LIBRARY") - target_link_options(${CUTLASS_ADD_SYCL_TARGET} PUBLIC ${DPCPP_FLAGS}) + target_link_options(${CUTLASS_ADD_SYCL_TARGET} PUBLIC ${DPCPP_FLAGS} ${DPCPP_LINK_ONLY_FLAGS}) endif() endfunction() diff --git a/cmake/googletest.cmake b/cmake/googletest.cmake index 5249b328fd..6912bef5d3 100644 --- a/cmake/googletest.cmake +++ b/cmake/googletest.cmake @@ -44,6 +44,15 @@ FetchContent_Declare( FetchContent_MakeAvailable(googletest) +if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") + if (TARGET gtest) + # Ignore unsupported warning flags on IntelLLVM + target_compile_options(gtest PRIVATE -Wno-unknown-warning-option) + # Show -Winline warnings, but don’t let them become errors + target_compile_options(gtest PRIVATE -Wno-error=inline) + endif() +endif() + if (MSVC) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) -endif() +endif() \ No newline at end of file diff --git a/examples/02_bmg_gemm_mixed_dtype/02_bmg_gemm_f16_u4_f16.cpp b/examples/02_bmg_gemm_mixed_dtype/02_bmg_gemm_f16_u4_f16.cpp index 5aa90672ae..c84658eec8 100755 --- a/examples/02_bmg_gemm_mixed_dtype/02_bmg_gemm_f16_u4_f16.cpp +++ b/examples/02_bmg_gemm_mixed_dtype/02_bmg_gemm_f16_u4_f16.cpp @@ -452,7 +452,11 @@ struct ExampleRunner { /// Initialize operands to be used in the GEMM and reference GEMM void initialize(Options const& options) { - auto [M, N, K, L] = ProblemShapeType{options.m, options.n, options.k, options.l}; + auto problem_shape = ProblemShapeType{options.m, options.n, options.k, options.l}; + auto& M = cute::get<0>(problem_shape); + auto& N = cute::get<1>(problem_shape); + auto& K = cute::get<2>(problem_shape); + auto& L = cute::get<3>(problem_shape); auto zero_elements_packed_along_k = get<0>(StrideZero{}); const int scale_k = cute::ceil_div(options.k, options.g); diff --git a/include/cutlass/gemm/collective/xe_mma_mixed_input.hpp b/include/cutlass/gemm/collective/xe_mma_mixed_input.hpp index 7901545b0b..2066d6747a 100644 --- a/include/cutlass/gemm/collective/xe_mma_mixed_input.hpp +++ b/include/cutlass/gemm/collective/xe_mma_mixed_input.hpp @@ -328,7 +328,7 @@ struct CollectiveMma< }(); if constexpr (ModeScale) { - return Params{tiled_copy_a, tiled_copy_b, tiled_copy_scale, {}, args.group_size}; + return Params{tiled_copy_a, tiled_copy_b, {tiled_copy_scale}, {}, args.group_size}; } else { auto ptr_Z = [&]() { if constexpr (sizeof_bits_v < 8) { @@ -353,7 +353,7 @@ struct CollectiveMma< } }(); - return Params{tiled_copy_a, tiled_copy_b, tiled_copy_scale, tiled_copy_zero, args.group_size}; + return Params{tiled_copy_a, tiled_copy_b, {tiled_copy_scale}, {tiled_copy_zero}, args.group_size}; } } } diff --git a/test/unit/cute/intel_xe/mma.cpp b/test/unit/cute/intel_xe/mma.cpp index 1c0e3d8a61..8bbd4bb157 100755 --- a/test/unit/cute/intel_xe/mma.cpp +++ b/test/unit/cute/intel_xe/mma.cpp @@ -89,6 +89,7 @@ void gemm_device(TA const *A, TB const *B, TC *C, uint32_t m, uint32_t n, #define CUTLASS_ENABLE_DEBUG_PRINTS (0) +#undef LOG_THREAD #define LOG_THREAD (16) #if CUTLASS_ENABLE_DEBUG_PRINTS diff --git a/test/unit/cute/intel_xe/utils.hpp b/test/unit/cute/intel_xe/utils.hpp index e109d9fe27..d4597912a3 100755 --- a/test/unit/cute/intel_xe/utils.hpp +++ b/test/unit/cute/intel_xe/utils.hpp @@ -52,6 +52,7 @@ using namespace syclcompat::experimental; #define CUTLASS_ENABLE_DEBUG_PRINTS (0) #define LOG_GROUP (0) +#undef LOG_THREAD #define LOG_THREAD (0) template