|
9 | 9 | import subprocess
|
10 | 10 | import sys
|
11 | 11 | from collections import defaultdict
|
| 12 | +from collections.abc import Generator |
12 | 13 | from datetime import datetime
|
13 | 14 | from pathlib import Path
|
14 |
| -from collections.abc import Generator |
15 | 15 |
|
16 | 16 | import click
|
17 | 17 | import multiprocessing_logging
|
@@ -55,10 +55,10 @@ def print_model_results(model_results: dict[str, list[ExecResult]]):
|
55 | 55 | run_tokens = len_tokens(result.run_stdout)
|
56 | 56 | result_total_tokens = gen_tokens + run_tokens
|
57 | 57 | print(
|
58 |
| - f"{checkmark} {result.name}: {duration_result:.2f}s/{result_total_tokens}tok " |
59 |
| - f"(gen: {result.timings['gen']:.2f}s/{gen_tokens}tok, " |
60 |
| - f"run: {result.timings['run']:.2f}s/{run_tokens}tok, " |
61 |
| - f"eval: {result.timings['eval']:.2f}s)" |
| 58 | + f"{checkmark} {result.name}: {duration_result:.0f}s/{result_total_tokens}tok " |
| 59 | + f"(gen: {result.timings['gen']:.0f}s/{gen_tokens}tok, " |
| 60 | + f"run: {result.timings['run']:.0f}s/{run_tokens}tok, " |
| 61 | + f"eval: {result.timings['eval']:.0f}s)" |
62 | 62 | )
|
63 | 63 | for case in cases:
|
64 | 64 | checkmark = "✅" if case.passed else "❌"
|
@@ -92,7 +92,7 @@ def print_model_results_table(model_results: dict[str, list[ExecResult]]):
|
92 | 92 | row.append(f"{checkmark} {reason}")
|
93 | 93 | else:
|
94 | 94 | row.append(
|
95 |
| - f"{checkmark} {duration:.2f}s/{gen_tokens+run_tokens}tok" |
| 95 | + f"{checkmark} {duration:.0f}s/{gen_tokens+run_tokens}tok" |
96 | 96 | )
|
97 | 97 | except StopIteration:
|
98 | 98 | row.append("❌ N/A")
|
|
0 commit comments