diff --git a/build_tools/benchmarks/common/benchmark_presentation.py b/build_tools/benchmarks/common/benchmark_presentation.py index 756bcbeb720a0..104547d58bbf1 100644 --- a/build_tools/benchmarks/common/benchmark_presentation.py +++ b/build_tools/benchmarks/common/benchmark_presentation.py @@ -18,7 +18,6 @@ from common.benchmark_thresholds import BENCHMARK_THRESHOLDS, COMPILATION_TIME_THRESHOLDS, TOTAL_ARTIFACT_SIZE_THRESHOLDS, TOTAL_DISPATCH_SIZE_THRESHOLDS, BenchmarkThreshold, ThresholdUnit GetMetricFunc = Callable[[Any], Tuple[int, Optional[int]]] -GetMetricsObjNameFunc = Callable[[Any], str] PERFBOARD_SERIES_PREFIX = "https://perf.iree.dev/serie?IREE?" BENCHMARK_RESULTS_HEADERS = [ @@ -48,6 +47,9 @@ class AggregateBenchmarkLatency: # The average latency time for the base commit to compare against. base_mean_time: Optional[int] = None + def __str__(self) -> str: + return self.name + @dataclass(frozen=True) class CompilationMetrics: @@ -61,6 +63,9 @@ class CompilationMetrics: base_total_artifact_bytes: Optional[int] = None base_total_dispatch_component_bytes: Optional[int] = None + def __str__(self) -> str: + return self.name + T = TypeVar("T") @@ -394,7 +399,7 @@ def _categorize_on_single_metric( raw group (the group with no base to compare to). Args: - metrics_map: map of (name, metrics object). + metrics_map: map of (series_id, metrics object). metric_func: the function returns current and base value of the metric. thresholds: list of threshold settings to match for categorizing. Returns: @@ -405,19 +410,20 @@ def _categorize_on_single_metric( improved_map = {} similar_map = {} raw_map = {} - for name, metrics_obj in metrics_map.items(): + for series_id, metrics_obj in metrics_map.items(): current, base = metric_func(metrics_obj) if base is None: - raw_map[name] = metrics_obj + raw_map[series_id] = metrics_obj continue + series_name = str(metrics_obj) similar_threshold = None for threshold in thresholds: - if threshold.regex.match(name): + if threshold.regex.match(series_name): similar_threshold = threshold break if similar_threshold is None: - raise ValueError(f"No matched threshold setting for: {name}") + raise ValueError(f"No matched threshold setting for: {series_name}") if similar_threshold.unit == ThresholdUnit.PERCENTAGE: ratio = abs(current - base) / base * 100 @@ -429,11 +435,11 @@ def _categorize_on_single_metric( ) if ratio <= similar_threshold.threshold: - similar_map[name] = metrics_obj + similar_map[series_id] = metrics_obj elif current > base: - regressed_map[name] = metrics_obj + regressed_map[series_id] = metrics_obj else: - improved_map[name] = metrics_obj + improved_map[series_id] = metrics_obj return (regressed_map, improved_map, similar_map, raw_map) @@ -528,20 +534,16 @@ def categorize_benchmarks_into_tables(benchmarks: Dict[ return "\n\n".join(tables) -def _sort_metrics_objects_and_get_table( - metrics_objs: Dict[str, T], - metrics_obj_name_func: GetMetricsObjNameFunc, - mapper: MetricsToTableMapper[T], - headers: Sequence[str], - size_cut: Optional[int] = None) -> str: +def _sort_metrics_objects_and_get_table(metrics_objs: Dict[str, T], + mapper: MetricsToTableMapper[T], + headers: Sequence[str], + size_cut: Optional[int] = None) -> str: """Sorts all metrics objects according to the improvement/regression ratio and returns a markdown table for it. Args: metrics_objs: map of (target_id, CompilationMetrics). All objects must contain base value. - metrics_obj_name_func: function that returns the display name of a metrics - object. mapper: MetricsToTableMapper for metrics_objs. headers: list of table headers. size_cut: If not None, only show the top N results for each table. @@ -553,8 +555,7 @@ def _sort_metrics_objects_and_get_table( raise AssertionError("Base can't be None for sorting.") ratio = abs(current - base) / base sorted_rows.append((ratio, ( - _make_series_link(metrics_obj_name_func(metrics_obj), - mapper.get_series_id(target_id)), + _make_series_link(str(metrics_obj), mapper.get_series_id(target_id)), _get_compare_text(current, base), ))) sorted_rows.sort(key=lambda row: row[0], reverse=True) @@ -591,7 +592,6 @@ def categorize_compilation_metrics_into_tables( tables.append( _sort_metrics_objects_and_get_table( metrics_objs=regressed, - metrics_obj_name_func=lambda obj: obj.name, mapper=mapper, headers=["Benchmark Name", table_header], size_cut=size_cut)) @@ -600,7 +600,6 @@ def categorize_compilation_metrics_into_tables( tables.append( _sort_metrics_objects_and_get_table( metrics_objs=improved, - metrics_obj_name_func=lambda obj: obj.name, mapper=mapper, headers=["Benchmark Name", table_header], size_cut=size_cut))