RISC-V: Move RVV V_REGS liveness computation into analyze_loop_vinfo

Currently, we compute RVV V_REGS liveness during better_main_loop_than_p which is not appropriate time to do that since we for example, when have the codes will finally pick LMUL = 8 vectorization factor, we compute liveness for LMUL = 8 multiple times which are redundant. Since we have leverage the current ARM SVE COST model: /* Do one-time initialization based on the vinfo. */ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo); if (!m_analyzed_vinfo) { if (loop_vinfo) analyze_loop_vinfo (loop_vinfo); m_analyzed_vinfo = true; } Analyze COST model only once for each cost model. So here we move dynamic LMUL liveness information into analyze_loop_vinfo. /* Do one-time initialization of the costs given that we're costing the loop vectorization described by LOOP_VINFO. */ void costs::analyze_loop_vinfo (loop_vec_info loop_vinfo) { ... /* Detect whether the LOOP has unexpected spills. */ record_potential_unexpected_spills (loop_vinfo); } So that we can avoid redundant computations and the current dynamic LMUL cost model flow is much more reasonable and consistent with others. Tested on RV32 and RV64 no regressions. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (compute_estimated_lmul): Allow fractional vecrtor. (preferred_new_lmul_p): Move RVV V_REGS liveness computation into analyze_loop_vinfo. (has_unexpected_spills_p): New function. (costs::record_potential_unexpected_spills): Ditto. (costs::better_main_loop_than_p): Move RVV V_REGS liveness computation into analyze_loop_vinfo. * config/riscv/riscv-vector-costs.h: New functions and variables. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c: Robostify test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-1.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-2.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-4.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-5.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-7.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/pr111848.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c: Ditto.
gcc-mirror · Dec 25, 2023 · ed60b28 · ed60b28
1 parent fd032cc
commit ed60b28
Show file tree

Hide file tree

Showing 38 changed files with 113 additions and 184 deletions.
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
@@ -390,14 +390,13 @@ non_contiguous_memory_access_p (stmt_vec_info stmt_info)
 
 /* Return the LMUL of the current analysis.  */
 static int
-compute_estimated_lmul (loop_vec_info other_loop_vinfo, machine_mode mode)
+compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
 {
   gcc_assert (GET_MODE_BITSIZE (mode).is_constant ());
-  int regno_alignment
-    = riscv_get_v_regno_alignment (other_loop_vinfo->vector_mode);
-  if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (other_loop_vinfo), 1U))
+  int regno_alignment = riscv_get_v_regno_alignment (loop_vinfo->vector_mode);
+  if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U))
     {
-      int estimated_vf = vect_vf_for_cost (other_loop_vinfo);
+      int estimated_vf = vect_vf_for_cost (loop_vinfo);
       return estimated_vf * GET_MODE_BITSIZE (mode).to_constant ()
 	     / TARGET_MIN_VLEN;
     }
@@ -407,12 +406,11 @@ compute_estimated_lmul (loop_vec_info other_loop_vinfo, machine_mode mode)
     {
       int ratio;
       if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR,
-			   LOOP_VINFO_SLP_UNROLLING_FACTOR (other_loop_vinfo),
+			   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo),
 			   &ratio))
 	return TARGET_MAX_LMUL / ratio;
-      else
-	gcc_unreachable ();
     }
+  return 0;
 }
 
 /* Update the live ranges according PHI.
@@ -576,49 +574,56 @@ update_local_live_ranges (
     }
 }
 
-/* Return true that the LMUL of new COST model is preferred.  */
+/* Compute the maximum live V_REGS.  */
 static bool
-preferred_new_lmul_p (loop_vec_info other_loop_vinfo)
+has_unexpected_spills_p (loop_vec_info loop_vinfo)
 {
+  /* We don't apply dynamic LMUL cost model on VLS modes.  */
+  if (!riscv_v_ext_vector_mode_p (loop_vinfo->vector_mode))
+    return false;
   /* Compute local program points.
      It's a fast and effective computation.  */
   hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
-  compute_local_program_points (other_loop_vinfo, program_points_per_bb);
+  compute_local_program_points (loop_vinfo, program_points_per_bb);
 
   /* Compute local live ranges.  */
   hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
   machine_mode biggest_mode
     = compute_local_live_ranges (program_points_per_bb, live_ranges_per_bb);
 
   /* Update live ranges according to PHI.  */
-  update_local_live_ranges (other_loop_vinfo, program_points_per_bb,
+  update_local_live_ranges (loop_vinfo, program_points_per_bb,
 			    live_ranges_per_bb, &biggest_mode);
 
-  int lmul = compute_estimated_lmul (other_loop_vinfo, biggest_mode);
+  int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
   /* TODO: We calculate the maximum live vars base on current STMTS
      sequence.  We can support live range shrink if it can give us
      big improvement in the future.  */
-  if (!live_ranges_per_bb.is_empty ())
+  if (lmul > RVV_M1)
     {
-      unsigned int max_nregs = 0;
-      for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
-	   = live_ranges_per_bb.begin ();
-	   iter != live_ranges_per_bb.end (); ++iter)
+      if (!live_ranges_per_bb.is_empty ())
 	{
-	  basic_block bb = (*iter).first;
-	  unsigned int max_point
-	    = (*program_points_per_bb.get (bb)).length () + 1;
-	  if ((*iter).second.is_empty ())
-	    continue;
-	  /* We prefer larger LMUL unless it causes register spillings.  */
-	  unsigned int nregs
-	    = max_number_of_live_regs (bb, (*iter).second, max_point,
-				       biggest_mode, lmul);
-	  if (nregs > max_nregs)
-	    max_nregs = nregs;
+	  unsigned int max_nregs = 0;
+	  for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
+	       = live_ranges_per_bb.begin ();
+	       iter != live_ranges_per_bb.end (); ++iter)
+	    {
+	      basic_block bb = (*iter).first;
+	      unsigned int max_point
+		= (*program_points_per_bb.get (bb)).length () + 1;
+	      if ((*iter).second.is_empty ())
+		continue;
+	      /* We prefer larger LMUL unless it causes register spillings. */
+	      unsigned int nregs
+		= max_number_of_live_regs (bb, (*iter).second, max_point,
+					   biggest_mode, lmul);
+	      if (nregs > max_nregs)
+		max_nregs = nregs;
+	    }
+	  live_ranges_per_bb.empty ();
+	  if (max_nregs > V_REG_NUM)
+	    return true;
 	}
-      live_ranges_per_bb.empty ();
-      return max_nregs > V_REG_NUM;
     }
   if (!program_points_per_bb.is_empty ())
     {
@@ -632,7 +637,7 @@ preferred_new_lmul_p (loop_vec_info other_loop_vinfo)
 	}
       program_points_per_bb.empty ();
     }
-  return lmul > RVV_M1;
+  return false;
 }
 
 costs::costs (vec_info *vinfo, bool costing_for_scalar)
@@ -667,6 +672,25 @@ costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
   /* Detect whether we're vectorizing for VLA and should apply the unrolling
      heuristic described above m_unrolled_vls_niters.  */
   record_potential_vls_unrolling (loop_vinfo);
+
+  /* Detect whether the LOOP has unexpected spills.  */
+  record_potential_unexpected_spills (loop_vinfo);
+}
+
+/* Analyze the vectorized program stataments and use dynamic LMUL
+   heuristic to detect whether the loop has unexpected spills.  */
+void
+costs::record_potential_unexpected_spills (loop_vec_info loop_vinfo)
+{
+  if (riscv_autovec_lmul == RVV_DYNAMIC)
+    {
+      bool post_dom_available_p = dom_info_available_p (CDI_POST_DOMINATORS);
+      if (!post_dom_available_p)
+	calculate_dominance_info (CDI_POST_DOMINATORS);
+      m_has_unexpected_spills_p = has_unexpected_spills_p (loop_vinfo);
+      if (!post_dom_available_p)
+	free_dominance_info (CDI_POST_DOMINATORS);
+    }
 }
 
 /* Decide whether to use the unrolling heuristic described above
@@ -762,19 +786,19 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
 	  return other_prefer_unrolled;
 	}
     }
-
-  if (!LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
-      && riscv_autovec_lmul == RVV_DYNAMIC)
+  else if (riscv_autovec_lmul == RVV_DYNAMIC
+	   && !LOOP_VINFO_NITERS_KNOWN_P (other_loop_vinfo))
     {
-      if (!riscv_v_ext_vector_mode_p (this_loop_vinfo->vector_mode))
+      if (other->m_has_unexpected_spills_p)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "Preferring smaller LMUL loop because"
+			     " it has unexpected spills\n");
+	  return true;
+	}
+      else
 	return false;
-      bool post_dom_available_p = dom_info_available_p (CDI_POST_DOMINATORS);
-      if (!post_dom_available_p)
-	calculate_dominance_info (CDI_POST_DOMINATORS);
-      bool preferred_p = preferred_new_lmul_p (other_loop_vinfo);
-      if (!post_dom_available_p)
-	free_dominance_info (CDI_POST_DOMINATORS);
-      return preferred_p;
     }
 
   return vector_costs::better_main_loop_than_p (other);

diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h
@@ -92,6 +92,14 @@ class costs : public vector_costs
   void analyze_loop_vinfo (loop_vec_info);
   void record_potential_vls_unrolling (loop_vec_info);
   bool prefer_unrolled_loop () const;
+
+  /* Analyze the vectorized program statements and compute the maximum live
+     V_REGS live at some program point if we enable dynamic LMUL cost model.
+
+     It's true when LMUL of loop vectorization factor > 1 and has unexpected
+     V_REGS spills according to the analysis.  */
+  bool m_has_unexpected_spills_p = false;
+  void record_potential_unexpected_spills (loop_vec_info);
 };
 
 } // namespace riscv_vector

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c
@@ -44,7 +44,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
 /* { dg-final { scan-assembler {e32,m2} } } */
 /* { dg-final { scan-assembler {e32,m8} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-1.c
@@ -85,7 +85,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b,    int32_t *__restrict c,
 
 /* { dg-final { scan-assembler {e32,m1} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-2.c
@@ -57,7 +57,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b,    int32_t *__restrict c,
 /* FIXME: Choosing LMUL = 1 is not the optimal since it can be LMUL = 2 if we apply instruction scheduler.  */
 /* { dg-final { scan-assembler {e32,m1} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c
@@ -85,7 +85,4 @@ foo (int8_t *__restrict a, int8_t *__restrict b,    int8_t *__restrict c,
 
 /* { dg-final { scan-assembler {e8,m1} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-4.c
@@ -115,7 +115,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b,    int32_t *__restrict c,
 
 /* { dg-final { scan-assembler {e32,m1} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-5.c
@@ -143,7 +143,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b,    int32_t *__restrict c,
 
 /* { dg-final { scan-assembler {e32,m1} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c
@@ -144,7 +144,4 @@ foo (int8_t *__restrict a, int8_t *__restrict b,    int8_t *__restrict c,
 
 /* { dg-final { scan-assembler {e8,m1} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-7.c
@@ -42,7 +42,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
 
 /* { dg-final { scan-assembler {e32,m1} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c
@@ -45,7 +45,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b,    int32_t *__restrict c,
 
 /* { dg-final { scan-assembler {e32,m2} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c
@@ -45,7 +45,4 @@ foo (int8_t *__restrict a, int8_t *__restrict b,    int8_t *__restrict c,
 
 /* { dg-final { scan-assembler {e8,m2} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c
@@ -45,7 +45,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b,    int32_t *__restrict c,
 
 /* { dg-final { scan-assembler {e32,m2} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c
@@ -43,7 +43,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
 
 /* { dg-final { scan-assembler {e32,m2} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c
@@ -46,7 +46,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
 /* { dg-final { scan-assembler {e32,m2} } } */
 /* { dg-final { scan-assembler {e8,m8} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c
@@ -48,7 +48,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
 /* { dg-final { scan-assembler {e32,m2} } } */
 /* { dg-final { scan-assembler {e8,m8} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c
@@ -29,7 +29,4 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c,
 
 /* { dg-final { scan-assembler {e32,m4} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c
@@ -20,7 +20,4 @@ bar (int *x, int a, int b, int n)
 /* { dg-final { scan-assembler {e32,m4} } } */
 /* { dg-final { scan-assembler-not {jr} } } */
 /* { dg-final { scan-assembler-times {ret} 2 } } *
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c
@@ -29,7 +29,4 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c,
 
 /* { dg-final { scan-assembler {e8,m4} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c
@@ -41,7 +41,4 @@ void foo2 (int64_t *__restrict a,
 
 /* { dg-final { scan-assembler {e64,m4} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c
@@ -41,7 +41,4 @@ void foo2 (int16_t *__restrict a,
 
 /* { dg-final { scan-assembler {e16,m2} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
@@ -21,7 +21,5 @@ foo (uint8_t *restrict a, uint8_t *restrict b, int n)
 
 /* { dg-final { scan-assembler {e8,m4} } } */
 /* { dg-final { scan-assembler-times {csrr} 1 } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* Since we don't support VLA SLP for LMUL = 8, dynamic LMUL cost model start from LMUL = 4.  */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c
@@ -41,7 +41,4 @@ void foo2 (int8_t *__restrict a,
 
 /* { dg-final { scan-assembler {e64,m4} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
-/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */