Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 13 additions & 14 deletions benchmarks/backward_equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,23 +598,22 @@ def test_cuda_backward_equivalence(accuracy_threshold=0.95):
(1, 2, 1, 4096, 4096, 96, True),
(1, 2, 1, 4096, 4096, 96, False),

# Not support head_dim => 128 in sm89 yet
# Because fwd uses splitkv branch, this branch does not support head_dim>=128 for now
# Head dim 128
# (1, 2, 1, 128, 128, 128, True),
# (1, 2, 1, 128, 128, 128, True),
# (1, 2, 1, 256, 256, 128, True),
# (1, 2, 1, 256, 256, 128, False),
# (1, 2, 1, 512, 512, 128, True),
# (1, 2, 1, 512, 512, 128, False),
# (1, 2, 1, 1024, 1024, 128, True),
# (1, 2, 1, 1024, 1024, 128, False),
# (1, 2, 1, 2048, 2048, 128, True),
# (1, 2, 1, 2048, 2048, 128, False),
# (1, 2, 1, 4096, 4096, 128, True),
# (1, 2, 1, 4096, 4096, 128, False),
(1, 2, 1, 128, 128, 128, True),
(1, 2, 1, 128, 128, 128, False),
Comment on lines +602 to +603
Copy link

Copilot AI Aug 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line 603 should have False as the last parameter, but the original commented line 603 had True. This creates an inconsistency where there are two test cases with True and none with False for the (1, 2, 1, 128, 128, 128) configuration.

Copilot uses AI. Check for mistakes.
(1, 2, 1, 256, 256, 128, True),
(1, 2, 1, 256, 256, 128, False),
(1, 2, 1, 512, 512, 128, True),
(1, 2, 1, 512, 512, 128, False),
(1, 2, 1, 1024, 1024, 128, True),
(1, 2, 1, 1024, 1024, 128, False),
(1, 2, 1, 2048, 2048, 128, True),
(1, 2, 1, 2048, 2048, 128, False),
(1, 2, 1, 4096, 4096, 128, True),
(1, 2, 1, 4096, 4096, 128, False),

# Head dim 256
# Because fwd uses splitkv branch, this branch does not support head_dim=256 for now
# For head_dim=256, besides the reason of splitkv branch, bwd itself does not support it, not enough shared memory
# (1, 2, 1, 128, 128, 256, True),
# (1, 2, 1, 128, 128, 256, False),
Expand Down
28 changes: 14 additions & 14 deletions benchmarks/forward_equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,22 +561,22 @@ def test_cuda_forward_equivalence(accuracy_threshold=0.95):
(1, 2, 1, 4096, 4096, 96, True),
(1, 2, 1, 4096, 4096, 96, False),

# Not support head_dim >= 128 in sm89 yet
# Because fwd uses splitkv branch by default, and shared memory is not enough for sm89
# Head dim 128
# (1, 2, 1, 128, 128, 128, True),
# (1, 2, 1, 128, 128, 128, False),
# (1, 2, 1, 256, 256, 128, True),
# (1, 2, 1, 256, 256, 128, False),
# (1, 2, 1, 512, 512, 128, True),
# (1, 2, 1, 512, 512, 128, False),
# (1, 2, 1, 1024, 1024, 128, True),
# (1, 2, 1, 1024, 1024, 128, False),
# (1, 2, 1, 2048, 2048, 128, True),
# (1, 2, 1, 2048, 2048, 128, False),
# (1, 2, 1, 4096, 4096, 128, True),
# (1, 2, 1, 4096, 4096, 128, False),
(1, 2, 1, 128, 128, 128, True),
(1, 2, 1, 128, 128, 128, False),
(1, 2, 1, 256, 256, 128, True),
(1, 2, 1, 256, 256, 128, False),
(1, 2, 1, 512, 512, 128, True),
(1, 2, 1, 512, 512, 128, False),
(1, 2, 1, 1024, 1024, 128, True),
(1, 2, 1, 1024, 1024, 128, False),
(1, 2, 1, 2048, 2048, 128, True),
(1, 2, 1, 2048, 2048, 128, False),
(1, 2, 1, 4096, 4096, 128, True),
(1, 2, 1, 4096, 4096, 128, False),

# Not support head_dim = 256 in sm89 yet
# Because fwd uses splitkv branch by default, and shared memory is not enough for sm89
# Head dim 256
# (1, 2, 1, 128, 128, 256, True),
# (1, 2, 1, 128, 128, 256, False),
Expand Down