From d8eba3cebbf33d834a7ba7f425f4ccabb0731069 Mon Sep 17 00:00:00 2001 From: Aidan Date: Wed, 14 Feb 2024 17:31:09 +0000 Subject: [PATCH 1/6] Update ggml_sycl_op_mul_mat_vec_q --- ggml-sycl.cpp | 252 +++++++++++++------------------------------------- 1 file changed, 64 insertions(+), 188 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index cd4b3a1e169c9..ca4d95b26c42e 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -9188,174 +9188,22 @@ static void convert_mul_mat_vec_f16_sycl(const void *vx, const dfloat *y, } } -static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK4_0 == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK4_1 == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK5_0 == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK5_1 == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK8_0 == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); -} - -static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy, - float *dst, const int ncols, - const int nrows, - dpct::queue_ptr stream) { - GGML_ASSERT(ncols % QK_K == 0); - const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; - const sycl::range<3> block_nums(1, 1, block_num_y); - const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { - mul_mat_vec_q(vx, vy, dst, ncols, nrows, - item_ct1); - }); +template +static void mul_mat_vec_q_sycl_submitter(const void *vx, const void *vy, + float *dst, const int ncols, + const int nrows, + dpct::queue_ptr stream) { + GGML_ASSERT(ncols % QK4_0 == 0); + const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; + const sycl::range<3> block_nums(1, 1, block_num_y); + const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); + stream->parallel_for( + sycl::nd_range<3>(block_nums * block_dims, block_dims), [= + ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); } int get_device_index_by_id(int id){ @@ -12095,37 +11943,62 @@ inline void ggml_sycl_op_mul_mat_vec_q( const int64_t ne00 = src0->ne[0]; const int64_t row_diff = row_high - row_low; + // TODO: support these quantization types + GGML_ASSERT(!(src0->type == GGML_TYPE_IQ2_XXS || + src0->type == GGML_TYPE_IQ2_XS || + src0->type == GGML_TYPE_IQ3_XXS)); + switch (src0->type) { case GGML_TYPE_Q4_0: - mul_mat_vec_q4_0_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q4_1: - mul_mat_vec_q4_1_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q5_0: - mul_mat_vec_q5_0_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q5_1: - mul_mat_vec_q5_1_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q8_0: - mul_mat_vec_q8_0_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q2_K: - mul_mat_vec_q2_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q3_K: - mul_mat_vec_q3_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q4_K: - mul_mat_vec_q4_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q5_K: - mul_mat_vec_q5_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; case GGML_TYPE_Q6_K: - mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); - break; + mul_mat_vec_q_sycl_submitter( + src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; default: GGML_ASSERT(false); break; @@ -15093,6 +14966,9 @@ static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_ten return false; } + if (a->type == GGML_TYPE_IQ3_XXS) { + return false; + } if (a->type == GGML_TYPE_IQ2_XXS) { return false; } From f58d29b10475a2f91e64338798ca19dc3874cde6 Mon Sep 17 00:00:00 2001 From: AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> Date: Thu, 15 Feb 2024 12:09:29 +0000 Subject: [PATCH 2/6] Apply suggestions from code review Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> --- ggml-sycl.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index ca4d95b26c42e..869ef9d15e4ab 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -11975,27 +11975,27 @@ inline void ggml_sycl_op_mul_mat_vec_q( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q2_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q3_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q4_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q5_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q6_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; From 31bc422b554d16271185b369fc8c525a04cb9965 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Fri, 16 Feb 2024 09:03:54 +0530 Subject: [PATCH 3/6] revert suggestion on macro --- ggml-sycl.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 869ef9d15e4ab..ca4d95b26c42e 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -11975,27 +11975,27 @@ inline void ggml_sycl_op_mul_mat_vec_q( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q2_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q3_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q4_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q5_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; case GGML_TYPE_Q6_K: - mul_mat_vec_q_sycl_submitter( src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; From 5d7d35370708a05aaf38836443332b0848b450a2 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Fri, 16 Feb 2024 09:16:50 +0530 Subject: [PATCH 4/6] fix bug --- ggml-sycl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index ca4d95b26c42e..caffe9b02333b 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -12018,7 +12018,7 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( const int64_t src1_ncols, const int64_t src1_padded_row_size, const dpct::queue_ptr &stream) { - GGML_TENSOR_BINARY_OP_LOCALS + GGML_TENSOR_BINARY_OP_LOCALS; const int64_t row_diff = row_high - row_low; From 8388e73b72e1b42be746ab3ad9cadafb60182410 Mon Sep 17 00:00:00 2001 From: Aidan Date: Mon, 19 Feb 2024 16:51:06 +0000 Subject: [PATCH 5/6] Add quant type GGML_TYPE_IQ1_S to unsupported --- ggml-sycl.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index caffe9b02333b..5c88036534c2a 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -11946,7 +11946,8 @@ inline void ggml_sycl_op_mul_mat_vec_q( // TODO: support these quantization types GGML_ASSERT(!(src0->type == GGML_TYPE_IQ2_XXS || src0->type == GGML_TYPE_IQ2_XS || - src0->type == GGML_TYPE_IQ3_XXS)); + src0->type == GGML_TYPE_IQ3_XXS || + src0->type == GGML_TYPE_IQ1_S)); switch (src0->type) { case GGML_TYPE_Q4_0: @@ -14965,7 +14966,10 @@ static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_ten if (a->ne[3] != b->ne[3]) { return false; } - + + if (a->type == GGML_TYPE_IQ1_S) { + return false; + } if (a->type == GGML_TYPE_IQ3_XXS) { return false; } From 6f139980d4ed1034831dc7b6e773a0f8d839d834 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Tue, 20 Feb 2024 10:56:21 +0530 Subject: [PATCH 6/6] fix format --- ggml-sycl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 5c88036534c2a..df182611241b0 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -14966,7 +14966,7 @@ static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, const ggml_ten if (a->ne[3] != b->ne[3]) { return false; } - + if (a->type == GGML_TYPE_IQ1_S) { return false; }