pass test

hipudding · hipudding · commit b2bc76c00686 · 2025-11-20T09:21:03.000Z
diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp
@@ -2287,29 +2287,31 @@ static void aclnn_rope_cache_init(ggml_backend_cann_context & ctx,
     // Step1.1: prepare theta_scale exponent. if this this exponent updated, should update theta_scale_tensor.
     acl_tensor_ptr acl_theta_scale_tensor;
     bool theta_scale_updated = false;
-    if (ctx.rope_cache.theta_scale_length != theta_scale_length || ctx.rope_cache.indep_sects != indep_sects) {
+    if (ctx.rope_cache.theta_scale_length != theta_scale_length || ctx.rope_cache.theta_scale != theta_scale  || ctx.rope_cache.indep_sects != indep_sects) {
         theta_scale_updated = true;
         if (ctx.rope_cache.theta_scale_exp_host != nullptr) {
             free(ctx.rope_cache.theta_scale_exp_host);
         }
         ctx.rope_cache.theta_scale_exp_host = (float *) malloc(theta_scale_length * sizeof(float));
 
         if (!indep_sects) {
-            for (int i = 0; i < theta_scale_length; i++) {
-                ctx.rope_cache.theta_scale_exp_host[i] = i;
+            ctx.rope_cache.theta_scale_exp_host[0] = 1;
+            for (int i = 1; i < theta_scale_length; i++) {
+                ctx.rope_cache.theta_scale_exp_host[i] = ctx.rope_cache.theta_scale_exp_host[i-1] * theta_scale;
             }
         } else {
             int sect_dims = sections[0] + sections[1] + sections[2] + sections[3];
             int sec_w     = sections[1] + sections[0];
             int sec_e     = sections[2] + sec_w;
-            int exp       = 0;
-            for (int i = 0; i < theta_scale_length; i++) {
+
+            ctx.rope_cache.theta_scale_exp_host[0] = 1;
+            for (int i = 1; i < theta_scale_length; i++) {
                 int sector = i % sect_dims;
                 if (sector == 0 || sector == sections[0] || sector == sec_w || sector == sec_e) {
+                    ctx.rope_cache.theta_scale_exp_host[i] = 1;
+                    continue;
                 }
-                exp = 0;
-                ctx.rope_cache.theta_scale_exp_host[i] = exp;
-                exp++;
+                ctx.rope_cache.theta_scale_exp_host[i] = ctx.rope_cache.theta_scale_exp_host[i-1] * theta_scale;
             }
         }
 
@@ -2329,15 +2331,7 @@ static void aclnn_rope_cache_init(ggml_backend_cann_context & ctx,
 
     // Step1.2: prepare theta_scale_tensor, if both theta_scale or theta_scale_tensor's exponent updated,
     // theta_scale_tensor should update.
-    if (ctx.rope_cache.theta_scale != theta_scale || theta_scale_updated) {
-        theta_scale_updated = true;
-        acl_scalar_ptr acl_theta_scale = ggml_cann_create_scalar(&theta_scale, aclDataType::ACL_FLOAT);
-        GGML_CANN_CALL_ACLNN_OP(ctx, PowScalarTensor, acl_theta_scale.get(), acl_theta_scale_tensor.get(),
-                                acl_theta_scale_tensor.get());
-        float res[64];
-        ACL_CHECK(aclrtSynchronizeStream(ctx.stream()));
-        ACL_CHECK(aclrtMemcpy(res, 64*4, ctx.rope_cache.theta_scale_cache, 64*4, ACL_MEMCPY_DEVICE_TO_HOST));
-    }
+
 
     // Step1.3: prepare rope_yarn_ramp, if this part updated, should update theta_scale_tensor.
     bool yarn_ramp_tensor_updated = false;
@@ -2410,7 +2404,8 @@ static void aclnn_rope_cache_init(ggml_backend_cann_context & ctx,
     if (mrope_used) {
         if (ctx.rope_cache.sections[0] != sections[0] || ctx.rope_cache.sections[1] != sections[1] ||
             ctx.rope_cache.sections[2] != sections[2] || ctx.rope_cache.sections[3] != sections[3] ||
-            ctx.rope_cache.theta_scale_length != theta_scale_length) {
+            ctx.rope_cache.theta_scale_length != theta_scale_length || 
+            ctx.rope_cache.is_imrope != is_imrope) {
             if (ctx.rope_cache.position_select_index_host != nullptr) {
                 free(ctx.rope_cache.position_select_index_host);
             }
@@ -2461,17 +2456,6 @@ static void aclnn_rope_cache_init(ggml_backend_cann_context & ctx,
                                                                sizeof(int), theta_scale_ne, theta_scale_nb, 1);
     }
 
-    ctx.rope_cache.indep_sects = indep_sects;
-    ctx.rope_cache.theta_scale_length = theta_scale_length;
-    ctx.rope_cache.freq_scale = freq_scale;
-    ctx.rope_cache.theta_scale = theta_scale;
-    ctx.rope_cache.ext_factor = ext_factor;
-    ctx.rope_cache.sections[0] = sections[0];
-    ctx.rope_cache.sections[1] = sections[1];
-    ctx.rope_cache.sections[2] = sections[2];
-    ctx.rope_cache.sections[3] = sections[3];
-
-
     ggml_cann_pool_alloc freq_fac_res_allocator(ctx.pool());
     // Step2: divide by freq_factors
     if (src2) {
@@ -2502,13 +2486,6 @@ static void aclnn_rope_cache_init(ggml_backend_cann_context & ctx,
                 ggml_cann_create_tensor(src1->data, ggml_cann_type_mapping(src1->type), ggml_type_size(src1->type), mrope_position_ne,
                                         mrope_position_nb, 2);
 
-        int res[128];
-        ACL_CHECK(aclrtSynchronizeStream(ctx.stream()));
-        ACL_CHECK(aclrtMemcpy(res, 8*4, src1->data, 8*4, ACL_MEMCPY_DEVICE_TO_HOST));
-
-        ACL_CHECK(aclrtSynchronizeStream(ctx.stream()));
-        ACL_CHECK(aclrtMemcpy(res, 64*4, ctx.rope_cache.position_select_index, 64*4, ACL_MEMCPY_DEVICE_TO_HOST));
-        
         // selected position tensor's shape is a transpose of cache tensor.
         int64_t selected_position_ne[]   = {position_length, theta_scale_length};
         size_t  selected_position_nb[]   = {sizeof(float), position_length * sizeof(float)};
@@ -2518,10 +2495,6 @@ static void aclnn_rope_cache_init(ggml_backend_cann_context & ctx,
                                         selected_position_nb, 2);
         GGML_CANN_CALL_ACLNN_OP(ctx, IndexSelect, mrope_position.get(), 0, position_select_index_tensor.get(), acl_position_tensor.get());
     
-
-        ACL_CHECK(aclrtSynchronizeStream(ctx.stream()));
-        ACL_CHECK(aclrtMemcpy(res, 128*4, mrope_position_buffer, 128*4, ACL_MEMCPY_DEVICE_TO_HOST));
-
         // transpose
         int64_t transposed_ne[]   = {position_length, 1, theta_scale_length, 1};
         size_t  transposed_nb[GGML_MAX_DIMS];
@@ -2551,10 +2524,6 @@ static void aclnn_rope_cache_init(ggml_backend_cann_context & ctx,
         ggml_cann_create_tensor(theta_buffer, ACL_FLOAT, sizeof(float), cache_ne, cache_nb, GGML_MAX_DIMS);
     aclnn_mul(ctx, acl_position_tensor.get(), acl_theta_scale_tensor.get(), acl_theta_tensor.get());
 
-    float res[128];
-    ACL_CHECK(aclrtSynchronizeStream(ctx.stream()));
-    ACL_CHECK(aclrtMemcpy(res, 128*4, theta_buffer, 128*4, ACL_MEMCPY_DEVICE_TO_HOST));
-
     // Step4: calculate sin cos.
     // init sin_repeat && cos_repeat, only to accelerate first layer on each device
     if (position_length > ctx.rope_cache.position_length) {
@@ -2621,9 +2590,16 @@ static void aclnn_rope_cache_init(ggml_backend_cann_context & ctx,
 
     // Other layers use cache except first layer.
     ctx.rope_cache.cached      = true;
-    ctx.rope_cache.ext_factor  = ext_factor;
+    ctx.rope_cache.indep_sects = indep_sects;
+    ctx.rope_cache.theta_scale_length = theta_scale_length;
+    ctx.rope_cache.freq_scale = freq_scale;
     ctx.rope_cache.theta_scale = theta_scale;
-    ctx.rope_cache.freq_scale  = freq_scale;
+    ctx.rope_cache.ext_factor = ext_factor;
+    ctx.rope_cache.is_imrope  = is_imrope;
+    ctx.rope_cache.sections[0] = sections[0];
+    ctx.rope_cache.sections[1] = sections[1];
+    ctx.rope_cache.sections[2] = sections[2];
+    ctx.rope_cache.sections[3] = sections[3];
     ctx.rope_cache.attn_factor = attn_factor;
     ctx.rope_cache.is_neox     = is_neox;
 }
@@ -2677,11 +2653,15 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
     float corr_dims[2];
     ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
 
-    const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
+    bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
     const bool is_imrope = mode == GGML_ROPE_TYPE_IMROPE; // qwen3vl apply interleaved mrope
     const bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;  // ggml_rope_multi, note: also true for vision (24 & 8 == true) and for imrope
     const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
 
+    if (is_imrope || mrope_used) {
+        is_neox = true;
+    }
+
     // init ctx.rope_cos/rope_sin cache
     aclnn_rope_cache_init(ctx, dst, corr_dims, ext_factor, theta_scale, freq_scale, attn_factor, is_neox, sections, mrope_used, is_imrope, is_vision);
 
diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h
@@ -338,6 +338,7 @@ struct ggml_cann_rope_cache {
     bool    is_neox            = false;
     bool    indep_sects        = false;
     int     sections[4]        = {0,0,0,0};
+    bool    is_imrope          = false;
 };
 
 struct ggml_cann_tensor_cache {
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
@@ -2477,6 +2477,11 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
                     return false;
                 }
 
+                // const int mode = ((const int32_t *) op->op_params)[2];
+                // if (mode & GGML_ROPE_TYPE_MROPE) {
+                //     return false;
+                // }
+
                 if (op->src[0]->ne[0] > 896) {
                     return false;
                 }

Original file line number	Diff line number	Diff line change
`@@ -2477,6 +2477,11 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten`
`2477`	`2477`	`return false;`
`2478`	`2478`	`}`
`2479`	`2479`
	`2480`	`+ // const int mode = ((const int32_t *) op->op_params)[2];`
	`2481`	`+ // if (mode & GGML_ROPE_TYPE_MROPE) {`
	`2482`	`+ // return false;`
	`2483`	`+ // }`
	`2484`	`+`
`2480`	`2485`	`if (op->src[0]->ne[0] > 896) {`
`2481`	`2486`	`return false;`
`2482`	`2487`	`}`