@@ -46,24 +46,18 @@ steps:
46
46
mirror_hardwares : [amdexperimental]
47
47
source_file_dependencies :
48
48
- vllm/
49
- - tests/mq_llm_engine
50
- - tests/async_engine
51
49
- tests/test_inputs.py
52
50
- tests/test_outputs.py
53
51
- tests/multimodal
54
52
- tests/utils_
55
- - tests/worker
56
53
- tests/standalone_tests/lazy_imports.py
57
54
- tests/transformers_utils
58
55
commands :
59
56
- python3 standalone_tests/lazy_imports.py
60
- - pytest -v -s mq_llm_engine # MQLLMEngine
61
- - pytest -v -s async_engine # AsyncLLMEngine
62
57
- pytest -v -s test_inputs.py
63
58
- pytest -v -s test_outputs.py
64
59
- pytest -v -s multimodal
65
60
- pytest -v -s utils_ # Utils
66
- - pytest -v -s worker # Worker
67
61
- pytest -v -s transformers_utils # transformers_utils
68
62
69
63
- label : Python-only Installation Test # 10min
@@ -84,25 +78,12 @@ steps:
84
78
- vllm/
85
79
- tests/basic_correctness/test_basic_correctness
86
80
- tests/basic_correctness/test_cpu_offload
87
- - tests/basic_correctness/test_preemption
88
81
- tests/basic_correctness/test_cumem.py
89
82
commands :
90
83
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
91
84
- pytest -v -s basic_correctness/test_cumem.py
92
85
- pytest -v -s basic_correctness/test_basic_correctness.py
93
86
- pytest -v -s basic_correctness/test_cpu_offload.py
94
- - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
95
-
96
- - label : Core Test # 22min
97
- timeout_in_minutes : 35
98
- mirror_hardwares : [amdexperimental]
99
- fast_check : true
100
- source_file_dependencies :
101
- - vllm/core
102
- - vllm/distributed
103
- - tests/core
104
- commands :
105
- - pytest -v -s core
106
87
107
88
- label : Entrypoints Unit Tests # 5min
108
89
timeout_in_minutes : 10
@@ -230,16 +211,14 @@ steps:
230
211
num_gpus : 2
231
212
source_file_dependencies :
232
213
- vllm/
233
- - tests/metrics
234
214
- tests/v1/tracing
235
215
commands :
236
- - pytest -v -s metrics
237
216
- " pip install \
238
217
'opentelemetry-sdk>=1.26.0' \
239
218
'opentelemetry-api>=1.26.0' \
240
219
'opentelemetry-exporter-otlp>=1.26.0' \
241
220
'opentelemetry-semantic-conventions-ai>=0.4.1'"
242
- - pytest -v -s tracing
221
+ - pytest -v -s v1/ tracing
243
222
244
223
# #### fast check tests #####
245
224
# #### 1 GPU test #####
@@ -394,6 +373,7 @@ steps:
394
373
- pytest -v -s compile/test_async_tp.py
395
374
- pytest -v -s compile/test_fusion_all_reduce.py
396
375
- pytest -v -s compile/test_decorator.py
376
+ - pytest -v -s compile/test_noop_elimination.py
397
377
398
378
- label : PyTorch Fullgraph Smoke Test # 15min
399
379
timeout_in_minutes : 30
@@ -548,15 +528,6 @@ steps:
548
528
commands : # LMEval+Transcription WER check
549
529
- pytest -s entrypoints/openai/correctness/
550
530
551
- - label : Encoder Decoder tests # 12min
552
- timeout_in_minutes : 20
553
- mirror_hardwares : [amdexperimental]
554
- source_file_dependencies :
555
- - vllm/
556
- - tests/encoder_decoder
557
- commands :
558
- - pytest -v -s encoder_decoder
559
-
560
531
- label : OpenAI-Compatible Tool Use # 23 min
561
532
timeout_in_minutes : 35
562
533
mirror_hardwares : [amdexperimental]
@@ -817,7 +788,7 @@ steps:
817
788
# Quantization
818
789
- pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8'
819
790
- pytest -v -s tests/kernels/quantization/test_nvfp4_quant.py
820
- - pytest -v -s tests/kernels/quantization/test_silu_nvfp4_quant_fusion .py
791
+ - pytest -v -s tests/kernels/quantization/test_silu_mul_nvfp4_quant .py
821
792
- pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
822
793
- pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py
823
794
- pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
@@ -829,6 +800,20 @@ steps:
829
800
- pytest -v -s tests/kernels/moe/test_flashinfer.py
830
801
- pytest -v -s tests/compile/test_silu_mul_quant_fusion.py
831
802
803
+ - label : GPT-OSS Eval (Blackwell)
804
+ timeout_in_minutes : 60
805
+ working_dir : " /vllm-workspace/"
806
+ gpu : b200
807
+ optional : true # disable while debugging
808
+ source_file_dependencies :
809
+ - tests/evals/gpt_oss
810
+ - vllm/model_executor/models/gpt_oss.py
811
+ - vllm/model_executor/layers/quantization/mxfp4.py
812
+ - vllm/v1/attention/backends/flashinfer.py
813
+ commands :
814
+ - uv pip install --system 'gpt-oss[eval]==0.0.5'
815
+ - pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58 --server-args '--tensor-parallel-size 2'
816
+
832
817
# #### 1 GPU test #####
833
818
# #### multi gpus test #####
834
819
@@ -954,7 +939,6 @@ steps:
954
939
commands :
955
940
- pytest -v -s distributed/test_pp_cudagraph.py
956
941
- pytest -v -s distributed/test_pipeline_parallel.py
957
- # - pytest -v -s distributed/test_context_parallel.py # TODO: enable it on Hopper runners or add triton MLA support
958
942
959
943
- label : LoRA TP Test (Distributed) # 17 min
960
944
timeout_in_minutes : 30
@@ -1028,9 +1012,21 @@ steps:
1028
1012
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
1029
1013
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
1030
1014
1031
- - label : Qwen MoE EP Test # optional
1015
+ # #### H200 test #####
1016
+ - label : Distrubted Tests (H200) # optional
1032
1017
gpu : h200
1033
1018
optional : true
1019
+ working_dir : " /vllm-workspace/"
1020
+ num_gpus : 2
1021
+ commands :
1022
+ - pytest -v -s tests/distributed/test_context_parallel.py
1023
+ - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
1024
+
1025
+ # #### B200 test #####
1026
+ - label : Distributed Tests (B200) # optional
1027
+ gpu : b200
1028
+ optional : true
1029
+ working_dir : " /vllm-workspace/"
1034
1030
num_gpus : 2
1035
1031
commands :
1036
- - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 /vllm-workspace/examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
1032
+ - pytest -v -s tests/distributed/test_context_parallel.py
0 commit comments