diff --git a/README.md b/README.md index d0f4aa9d6..5fb1740a9 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,7 @@ Q4 requires the larger-memory machine class, so M3 Max Q4 numbers are `N/A`. | Mac Studio M3 Ultra, 512 GB | q4 | 12018 tokens | 448.82 t/s | 26.62 t/s | | Mac Studio M3 Ultra, 512 GB | PRO q2 | 32768 tokens | 138.82 t/s | 9.56 t/s | | DGX Spark GB10, 128 GB | q2 | 7047 tokens | 343.81 t/s | 13.75 t/s | +| NVIDIA RTX PRO 6000 Blackwell, 96 GB VRAM | q2-imatrix | 8192 tokens | 317.34 t/s | 33.85 t/s | ![M3 Max t/s](speed-bench/m3_max_ts.svg) ![PRO model M3 Ultra t/s](speed-bench/pro_model_m3_ultra_ts.svg) diff --git a/speed-bench/rtx_pro_6000_blackwell.csv b/speed-bench/rtx_pro_6000_blackwell.csv new file mode 100644 index 000000000..3c55cd837 --- /dev/null +++ b/speed-bench/rtx_pro_6000_blackwell.csv @@ -0,0 +1,33 @@ +ctx_tokens,prefill_tokens,prefill_tps,gen_tokens,gen_tps,kvcache_bytes +2048,2048,319.92,128,36.34,52184460 +4096,2048,318.59,128,33.79,80373132 +6144,2048,317.86,128,34.46,108561804 +8192,2048,317.35,128,33.83,136750476 +10240,2048,316.51,128,33.59,164939148 +12288,2048,315.90,128,33.45,193127820 +14336,2048,315.27,128,33.36,221316492 +16384,2048,315.05,128,33.91,249505164 +18432,2048,314.45,128,33.75,277693836 +20480,2048,313.85,128,33.57,305882508 +22528,2048,313.32,128,33.51,334071180 +24576,2048,312.75,128,33.36,362259852 +26624,2048,312.16,128,33.21,390448524 +28672,2048,311.54,128,33.05,418637196 +30720,2048,311.07,128,32.93,446825868 +32768,2048,310.46,128,31.14,475014540 +34816,2048,307.87,128,30.59,503203212 +36864,2048,307.33,128,30.48,531391884 +38912,2048,306.72,128,30.45,559580556 +40960,2048,306.12,128,30.38,587769228 +43008,2048,305.55,128,30.34,615957900 +45056,2048,305.01,128,30.22,644146572 +47104,2048,304.40,128,30.20,672335244 +49152,2048,303.86,128,30.12,700523916 +51200,2048,302.83,128,30.00,728712588 +53248,2048,302.27,128,29.86,756901260 +55296,2048,301.72,128,29.78,785089932 +57344,2048,301.18,128,29.71,813278604 +59392,2048,300.58,128,29.61,841467276 +61440,2048,300.04,128,29.47,869655948 +63488,2048,299.50,128,29.31,897844620 +65536,2048,298.97,128,29.20,926033292 diff --git a/speed-bench/rtx_pro_6000_blackwell_ts.svg b/speed-bench/rtx_pro_6000_blackwell_ts.svg new file mode 100644 index 000000000..891b3c7d9 --- /dev/null +++ b/speed-bench/rtx_pro_6000_blackwell_ts.svg @@ -0,0 +1,48 @@ + + + + +RTX PRO 6000 Blackwell t/s + +0 + +100 + +200 + +300 + +400 +0 +10 +20 +30 +40 + +0 + +20k + +40k + +60k + + + +ctx size +prefill t/s +generation t/s + + + + +prefill + +generation +