diff --git a/build_tools/benchmarks/common/benchmark_presentation.py b/build_tools/benchmarks/common/benchmark_presentation.py
index 756bcbeb720a0..104547d58bbf1 100644
--- a/build_tools/benchmarks/common/benchmark_presentation.py
+++ b/build_tools/benchmarks/common/benchmark_presentation.py
@@ -18,7 +18,6 @@
 from common.benchmark_thresholds import BENCHMARK_THRESHOLDS, COMPILATION_TIME_THRESHOLDS, TOTAL_ARTIFACT_SIZE_THRESHOLDS, TOTAL_DISPATCH_SIZE_THRESHOLDS, BenchmarkThreshold, ThresholdUnit
 
 GetMetricFunc = Callable[[Any], Tuple[int, Optional[int]]]
-GetMetricsObjNameFunc = Callable[[Any], str]
 
 PERFBOARD_SERIES_PREFIX = "https://perf.iree.dev/serie?IREE?"
 BENCHMARK_RESULTS_HEADERS = [
@@ -48,6 +47,9 @@ class AggregateBenchmarkLatency:
   # The average latency time for the base commit to compare against.
   base_mean_time: Optional[int] = None
 
+  def __str__(self) -> str:
+    return self.name
+
 
 @dataclass(frozen=True)
 class CompilationMetrics:
@@ -61,6 +63,9 @@ class CompilationMetrics:
   base_total_artifact_bytes: Optional[int] = None
   base_total_dispatch_component_bytes: Optional[int] = None
 
+  def __str__(self) -> str:
+    return self.name
+
 
 T = TypeVar("T")
 
@@ -394,7 +399,7 @@ def _categorize_on_single_metric(
     raw group (the group with no base to compare to).
 
     Args:
-      metrics_map: map of (name, metrics object).
+      metrics_map: map of (series_id, metrics object).
       metric_func: the function returns current and base value of the metric.
       thresholds: list of threshold settings to match for categorizing.
     Returns:
@@ -405,19 +410,20 @@ def _categorize_on_single_metric(
   improved_map = {}
   similar_map = {}
   raw_map = {}
-  for name, metrics_obj in metrics_map.items():
+  for series_id, metrics_obj in metrics_map.items():
     current, base = metric_func(metrics_obj)
     if base is None:
-      raw_map[name] = metrics_obj
+      raw_map[series_id] = metrics_obj
       continue
 
+    series_name = str(metrics_obj)
     similar_threshold = None
     for threshold in thresholds:
-      if threshold.regex.match(name):
+      if threshold.regex.match(series_name):
         similar_threshold = threshold
         break
     if similar_threshold is None:
-      raise ValueError(f"No matched threshold setting for: {name}")
+      raise ValueError(f"No matched threshold setting for: {series_name}")
 
     if similar_threshold.unit == ThresholdUnit.PERCENTAGE:
       ratio = abs(current - base) / base * 100
@@ -429,11 +435,11 @@ def _categorize_on_single_metric(
       )
 
     if ratio <= similar_threshold.threshold:
-      similar_map[name] = metrics_obj
+      similar_map[series_id] = metrics_obj
     elif current > base:
-      regressed_map[name] = metrics_obj
+      regressed_map[series_id] = metrics_obj
     else:
-      improved_map[name] = metrics_obj
+      improved_map[series_id] = metrics_obj
 
   return (regressed_map, improved_map, similar_map, raw_map)
 
@@ -528,20 +534,16 @@ def categorize_benchmarks_into_tables(benchmarks: Dict[
   return "\n\n".join(tables)
 
 
-def _sort_metrics_objects_and_get_table(
-    metrics_objs: Dict[str, T],
-    metrics_obj_name_func: GetMetricsObjNameFunc,
-    mapper: MetricsToTableMapper[T],
-    headers: Sequence[str],
-    size_cut: Optional[int] = None) -> str:
+def _sort_metrics_objects_and_get_table(metrics_objs: Dict[str, T],
+                                        mapper: MetricsToTableMapper[T],
+                                        headers: Sequence[str],
+                                        size_cut: Optional[int] = None) -> str:
   """Sorts all metrics objects according to the improvement/regression ratio and
     returns a markdown table for it.
 
     Args:
       metrics_objs: map of (target_id, CompilationMetrics). All objects must
         contain base value.
-      metrics_obj_name_func: function that returns the display name of a metrics
-        object.
       mapper: MetricsToTableMapper for metrics_objs.
       headers: list of table headers.
       size_cut: If not None, only show the top N results for each table.
@@ -553,8 +555,7 @@ def _sort_metrics_objects_and_get_table(
       raise AssertionError("Base can't be None for sorting.")
     ratio = abs(current - base) / base
     sorted_rows.append((ratio, (
-        _make_series_link(metrics_obj_name_func(metrics_obj),
-                          mapper.get_series_id(target_id)),
+        _make_series_link(str(metrics_obj), mapper.get_series_id(target_id)),
         _get_compare_text(current, base),
     )))
   sorted_rows.sort(key=lambda row: row[0], reverse=True)
@@ -591,7 +592,6 @@ def categorize_compilation_metrics_into_tables(
       tables.append(
           _sort_metrics_objects_and_get_table(
               metrics_objs=regressed,
-              metrics_obj_name_func=lambda obj: obj.name,
               mapper=mapper,
               headers=["Benchmark Name", table_header],
               size_cut=size_cut))
@@ -600,7 +600,6 @@ def categorize_compilation_metrics_into_tables(
       tables.append(
           _sort_metrics_objects_and_get_table(
               metrics_objs=improved,
-              metrics_obj_name_func=lambda obj: obj.name,
               mapper=mapper,
               headers=["Benchmark Name", table_header],
               size_cut=size_cut))