From 6792165f4510d61067f84dacc5d627485b25321f Mon Sep 17 00:00:00 2001 From: Loser Cheems Date: Tue, 1 Jul 2025 11:16:54 +0800 Subject: [PATCH] Updates benchmark test configurations for better coverage Adjusts query and key lengths in test configurations to provide more balanced testing scenarios. Changes small sequence length tests from 4 to 64 tokens to better represent realistic use cases, and modifies the largest configuration to use matching sequence lengths with non-causal attention for improved test diversity. --- benchmarks/benchmark_forward_equivalence.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark_forward_equivalence.py b/benchmarks/benchmark_forward_equivalence.py index 39e64e0..2c73a53 100644 --- a/benchmarks/benchmark_forward_equivalence.py +++ b/benchmarks/benchmark_forward_equivalence.py @@ -357,8 +357,8 @@ def test_forward_equivalence(accuracy_threshold=0.95): # If you encounter NAN issues when running multiple configurations, try running a single configuration test_configs = [ # (batch_size, num_heads, num_kv_heads, query_len, key_len, head_dim, is_causal) - (1, 1, 1, 4, 64, 32, True), - (1, 1, 1, 4, 64, 32, False), + (1, 1, 1, 64, 64, 32, True), + (1, 1, 1, 64, 64, 32, False), (1, 1, 1, 128, 128, 32, True), (1, 1, 1, 128, 128, 32, False), (1, 1, 1, 256, 256, 32, True), @@ -377,7 +377,7 @@ def test_forward_equivalence(accuracy_threshold=0.95): (1, 2, 1, 64, 64, 128, True), (1, 2, 1, 128, 128, 128, True), (1, 2, 1, 256, 256, 128, True), - (1, 2, 1, 511, 512, 128, True), + (1, 2, 1, 512, 512, 128, False), ] device = torch.device("cuda" if torch.cuda.is_available() else "cpu")