Fix threshold matching for benchmark summary (#13011)

The key of results map is the artificial benchmark id instead of benchmark name. Change to use benchmark name to match threshold. We forgot to update this part when changing to use artificial benchmark id to key results
iree-org · May 1, 2023 · c790a97 · c790a97
1 parent c6fd46d
commit c790a97
Showing 1 changed file with 20 additions and 21 deletions.
diff --git a/build_tools/benchmarks/common/benchmark_presentation.py b/build_tools/benchmarks/common/benchmark_presentation.py
@@ -18,7 +18,6 @@
 from common.benchmark_thresholds import BENCHMARK_THRESHOLDS, COMPILATION_TIME_THRESHOLDS, TOTAL_ARTIFACT_SIZE_THRESHOLDS, TOTAL_DISPATCH_SIZE_THRESHOLDS, BenchmarkThreshold, ThresholdUnit
 
 GetMetricFunc = Callable[[Any], Tuple[int, Optional[int]]]
-GetMetricsObjNameFunc = Callable[[Any], str]
 
 PERFBOARD_SERIES_PREFIX = "https://perf.iree.dev/serie?IREE?"
 BENCHMARK_RESULTS_HEADERS = [
@@ -48,6 +47,9 @@ class AggregateBenchmarkLatency:
   # The average latency time for the base commit to compare against.
   base_mean_time: Optional[int] = None
 
+  def __str__(self) -> str:
+    return self.name
+
 
 @dataclass(frozen=True)
 class CompilationMetrics:
@@ -61,6 +63,9 @@ class CompilationMetrics:
   base_total_artifact_bytes: Optional[int] = None
   base_total_dispatch_component_bytes: Optional[int] = None
 
+  def __str__(self) -> str:
+    return self.name
+
 
 T = TypeVar("T")
 
@@ -394,7 +399,7 @@ def _categorize_on_single_metric(
     raw group (the group with no base to compare to).
 
     Args:
-      metrics_map: map of (name, metrics object).
+      metrics_map: map of (series_id, metrics object).
       metric_func: the function returns current and base value of the metric.
       thresholds: list of threshold settings to match for categorizing.
     Returns:
@@ -405,19 +410,20 @@ def _categorize_on_single_metric(
   improved_map = {}
   similar_map = {}
   raw_map = {}
-  for name, metrics_obj in metrics_map.items():
+  for series_id, metrics_obj in metrics_map.items():
     current, base = metric_func(metrics_obj)
     if base is None:
-      raw_map[name] = metrics_obj
+      raw_map[series_id] = metrics_obj
       continue
 
+    series_name = str(metrics_obj)
     similar_threshold = None
     for threshold in thresholds:
-      if threshold.regex.match(name):
+      if threshold.regex.match(series_name):
         similar_threshold = threshold
         break
     if similar_threshold is None:
-      raise ValueError(f"No matched threshold setting for: {name}")
+      raise ValueError(f"No matched threshold setting for: {series_name}")
 
     if similar_threshold.unit == ThresholdUnit.PERCENTAGE:
       ratio = abs(current - base) / base * 100
@@ -429,11 +435,11 @@ def _categorize_on_single_metric(
       )
 
     if ratio <= similar_threshold.threshold:
-      similar_map[name] = metrics_obj
+      similar_map[series_id] = metrics_obj
     elif current > base:
-      regressed_map[name] = metrics_obj
+      regressed_map[series_id] = metrics_obj
     else:
-      improved_map[name] = metrics_obj
+      improved_map[series_id] = metrics_obj
 
   return (regressed_map, improved_map, similar_map, raw_map)
 
@@ -528,20 +534,16 @@ def categorize_benchmarks_into_tables(benchmarks: Dict[
   return "\n\n".join(tables)
 
 
-def _sort_metrics_objects_and_get_table(
-    metrics_objs: Dict[str, T],
-    metrics_obj_name_func: GetMetricsObjNameFunc,
-    mapper: MetricsToTableMapper[T],
-    headers: Sequence[str],
-    size_cut: Optional[int] = None) -> str:
+def _sort_metrics_objects_and_get_table(metrics_objs: Dict[str, T],
+                                        mapper: MetricsToTableMapper[T],
+                                        headers: Sequence[str],
+                                        size_cut: Optional[int] = None) -> str:
   """Sorts all metrics objects according to the improvement/regression ratio and
     returns a markdown table for it.
 
     Args:
       metrics_objs: map of (target_id, CompilationMetrics). All objects must
         contain base value.
-      metrics_obj_name_func: function that returns the display name of a metrics
-        object.
       mapper: MetricsToTableMapper for metrics_objs.
       headers: list of table headers.
       size_cut: If not None, only show the top N results for each table.
@@ -553,8 +555,7 @@ def _sort_metrics_objects_and_get_table(
       raise AssertionError("Base can't be None for sorting.")
     ratio = abs(current - base) / base
     sorted_rows.append((ratio, (
-        _make_series_link(metrics_obj_name_func(metrics_obj),
-                          mapper.get_series_id(target_id)),
+        _make_series_link(str(metrics_obj), mapper.get_series_id(target_id)),
         _get_compare_text(current, base),
     )))
   sorted_rows.sort(key=lambda row: row[0], reverse=True)
@@ -591,7 +592,6 @@ def categorize_compilation_metrics_into_tables(
       tables.append(
           _sort_metrics_objects_and_get_table(
               metrics_objs=regressed,
-              metrics_obj_name_func=lambda obj: obj.name,
               mapper=mapper,
               headers=["Benchmark Name", table_header],
               size_cut=size_cut))
@@ -600,7 +600,6 @@ def categorize_compilation_metrics_into_tables(
       tables.append(
           _sort_metrics_objects_and_get_table(
               metrics_objs=improved,
-              metrics_obj_name_func=lambda obj: obj.name,
               mapper=mapper,
               headers=["Benchmark Name", table_header],
               size_cut=size_cut))