From fc843ed14ff14b6d04f3c5f545636bfd2765d641 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 30 Oct 2025 17:33:49 -0500 Subject: [PATCH 1/8] Add support to download models automatically if --model specified in vllm args --- vec_inf/client/_slurm_script_generator.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index a5ede3c1..e1f32c50 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -137,6 +137,13 @@ def _generate_launch_cmd(self) -> str: Server launch command. """ launcher_script = ["\n"] + + # Check if --model is specified in vllm_args to use HuggingFace model name + model_path = self.model_weights_path + vllm_args_copy = self.params["vllm_args"].copy() + if "--model" in vllm_args_copy: + model_path = vllm_args_copy.pop("--model") + if self.use_container: launcher_script.append( SLURM_SCRIPT_TEMPLATE["container_command"].format( @@ -151,12 +158,12 @@ def _generate_launch_cmd(self) -> str: ) launcher_script.append( "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=self.model_weights_path, + model_weights_path=model_path, model_name=self.params["model_name"], ) ) - for arg, value in self.params["vllm_args"].items(): + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): launcher_script.append(f" {arg} \\") else: @@ -256,6 +263,12 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) + # Check if --model is specified in vllm_args to use HuggingFace model name + model_path = model_params["model_weights_path"] + vllm_args_copy = model_params["vllm_args"].copy() + if "--model" in vllm_args_copy: + model_path = vllm_args_copy.pop("--model") + if self.use_container: script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format( @@ -265,11 +278,12 @@ def _generate_model_launch_script(self, model_name: str) -> Path: ) script_content.append( "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_params["model_weights_path"], + model_weights_path=model_path, model_name=model_name, ) ) - for arg, value in model_params["vllm_args"].items(): + + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): script_content.append(f" {arg} \\") else: From 5f790ffcce8dadc3c9edf79724eb378e6ee77d97 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 30 Oct 2025 18:21:34 -0500 Subject: [PATCH 2/8] create model dir if it doesn't exist --- vec_inf/client/_slurm_script_generator.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index e1f32c50..e52c94ba 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -38,9 +38,11 @@ def __init__(self, params: dict[str, Any]): self.additional_binds = self.params.get("bind", "") if self.additional_binds: self.additional_binds = f" --bind {self.additional_binds}" - self.model_weights_path = str( - Path(self.params["model_weights_parent_dir"], self.params["model_name"]) + model_weights_path = Path( + self.params["model_weights_parent_dir"], self.params["model_name"] ) + model_weights_path.mkdir(parents=True, exist_ok=True) + self.model_weights_path = str(model_weights_path) env_dict: dict[str, str] = self.params.get("env", {}) # Create string of environment variables self.env_str = "" @@ -210,11 +212,13 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["additional_binds"] = ( f" --bind {self.params['models'][model_name]['bind']}" ) + model_weights_path = Path( + self.params["models"][model_name]["model_weights_parent_dir"], + model_name, + ) + model_weights_path.mkdir(parents=True, exist_ok=True) self.params["models"][model_name]["model_weights_path"] = str( - Path( - self.params["models"][model_name]["model_weights_parent_dir"], - model_name, - ) + model_weights_path ) def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: From 0f22bec693f56dba6b15c2887934d5be0fb320d9 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:40:16 -0600 Subject: [PATCH 3/8] Check model weights existence before binding; use HF model name if missing --- .../client/test_slurm_script_generator.py | 31 +++++++++-- vec_inf/client/_slurm_script_generator.py | 52 +++++++++++++------ vec_inf/client/_slurm_templates.py | 8 +-- 3 files changed, 69 insertions(+), 22 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index 0b55f59b..bad302e2 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -12,6 +12,15 @@ ) +@pytest.fixture(autouse=True) +def patch_model_weights_exists(monkeypatch): + """Ensure model weights directory existence checks default to True.""" + + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True + ) + + class TestSlurmScriptGenerator: """Tests for SlurmScriptGenerator class.""" @@ -164,9 +173,8 @@ def test_generate_server_setup_singularity(self, singularity_params): setup = generator._generate_server_setup() assert "ray stop" in setup - assert ( - "module load " in setup - ) # Remove module name since it's inconsistent between clusters + # Note: module_load_cmd may be empty in some configs, so we don't assert it + # The container setup should still work without it def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" @@ -190,6 +198,23 @@ def test_generate_launch_cmd_singularity(self, singularity_params): assert "--bind /scratch:/scratch,/data:/data" in launch_cmd assert "source" not in launch_cmd + def test_generate_launch_cmd_singularity_no_local_weights( + self, singularity_params, monkeypatch + ): + """Test container launch when model weights directory is missing.""" + + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = SlurmScriptGenerator(singularity_params) + launch_cmd = generator._generate_launch_cmd() + + assert "exec --nv" in launch_cmd + assert "--bind /path/to/model_weights/test-model" not in launch_cmd + assert "vllm serve test-model" in launch_cmd + def test_generate_launch_cmd_boolean_args(self, basic_params): """Test launch command with boolean vLLM arguments.""" params = basic_params.copy() diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index e52c94ba..215526eb 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -41,8 +41,14 @@ def __init__(self, params: dict[str, Any]): model_weights_path = Path( self.params["model_weights_parent_dir"], self.params["model_name"] ) - model_weights_path.mkdir(parents=True, exist_ok=True) + self.model_weights_exists = model_weights_path.exists() self.model_weights_path = str(model_weights_path) + self.model_source = ( + self.model_weights_path if self.model_weights_exists else self.params["model_name"] + ) + self.model_bind_option = ( + f" --bind {self.model_weights_path}" if self.model_weights_exists else "" + ) env_dict: dict[str, str] = self.params.get("env", {}) # Create string of environment variables self.env_str = "" @@ -53,6 +59,14 @@ def __init__(self, params: dict[str, Any]): self.env_str += "," self.env_str += key + "=" + val + # # Ensure CUDA_VISIBLE_DEVICES is passed through to the container + # if self.use_container and "CUDA_VISIBLE_DEVICES" not in env_dict: + # if len(self.env_str) == 0: + # self.env_str = "--env " + # else: + # self.env_str += "," + # self.env_str += "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" + def _generate_script_content(self) -> str: """Generate the complete Slurm script content. @@ -109,7 +123,7 @@ def _generate_server_setup(self) -> str: server_setup_str = server_setup_str.replace( "CONTAINER_PLACEHOLDER", SLURM_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=self.model_weights_path, + model_bind_option=self.model_bind_option, additional_binds=self.additional_binds, env_str=self.env_str, ), @@ -140,16 +154,15 @@ def _generate_launch_cmd(self) -> str: """ launcher_script = ["\n"] - # Check if --model is specified in vllm_args to use HuggingFace model name - model_path = self.model_weights_path vllm_args_copy = self.params["vllm_args"].copy() + model_source = self.model_source if "--model" in vllm_args_copy: - model_path = vllm_args_copy.pop("--model") + model_source = vllm_args_copy.pop("--model") if self.use_container: launcher_script.append( SLURM_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=self.model_weights_path, + model_bind_option=self.model_bind_option, additional_binds=self.additional_binds, env_str=self.env_str, ) @@ -160,7 +173,7 @@ def _generate_launch_cmd(self) -> str: ) launcher_script.append( "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_path, + model_source=model_source, model_name=self.params["model_name"], ) ) @@ -216,9 +229,19 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["model_weights_parent_dir"], model_name, ) - model_weights_path.mkdir(parents=True, exist_ok=True) - self.params["models"][model_name]["model_weights_path"] = str( - model_weights_path + model_weights_exists = model_weights_path.exists() + model_weights_path_str = str(model_weights_path) + self.params["models"][model_name]["model_weights_path"] = ( + model_weights_path_str + ) + self.params["models"][model_name]["model_weights_exists"] = ( + model_weights_exists + ) + self.params["models"][model_name]["model_bind_option"] = ( + f" --bind {model_weights_path_str}" if model_weights_exists else "" + ) + self.params["models"][model_name]["model_source"] = ( + model_weights_path_str if model_weights_exists else model_name ) def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: @@ -267,22 +290,21 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) - # Check if --model is specified in vllm_args to use HuggingFace model name - model_path = model_params["model_weights_path"] vllm_args_copy = model_params["vllm_args"].copy() + model_source = model_params.get("model_source", model_params["model_weights_path"]) if "--model" in vllm_args_copy: - model_path = vllm_args_copy.pop("--model") + model_source = vllm_args_copy.pop("--model") if self.use_container: script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=model_params["model_weights_path"], + model_bind_option=model_params.get("model_bind_option", ""), additional_binds=model_params["additional_binds"], ) ) script_content.append( "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_path, + model_source=model_source, model_name=model_name, ) ) diff --git a/vec_inf/client/_slurm_templates.py b/vec_inf/client/_slurm_templates.py index 209534d8..47dc6527 100644 --- a/vec_inf/client/_slurm_templates.py +++ b/vec_inf/client/_slurm_templates.py @@ -99,7 +99,7 @@ class SlurmScriptTemplate(TypedDict): "env_vars": [ f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')" ], - "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\", + "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}}{{model_bind_option}}{{additional_binds}} --containall {IMAGE_PATH} \\", "activate_venv": "source {venv}/bin/activate", "server_setup": { "single_node": [ @@ -147,7 +147,7 @@ class SlurmScriptTemplate(TypedDict): ' && mv temp.json "$json_path"', ], "launch_cmd": [ - "vllm serve {model_weights_path} \\", + "vllm serve {model_source} \\", " --served-model-name {model_name} \\", ' --host "0.0.0.0" \\', " --port $vllm_port_number \\", @@ -238,9 +238,9 @@ class BatchModelLaunchScriptTemplate(TypedDict): ' "$json_path" > temp_{model_name}.json \\', ' && mv temp_{model_name}.json "$json_path"\n', ], - "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\", + "container_command": f"{CONTAINER_MODULE_NAME} exec --nv{{model_bind_option}}{{additional_binds}} --containall {IMAGE_PATH} \\", "launch_cmd": [ - "vllm serve {model_weights_path} \\", + "vllm serve {model_source} \\", " --served-model-name {model_name} \\", ' --host "0.0.0.0" \\', " --port $vllm_port_number \\", From 9f2fdd21b7e22b1e557e6f1ee63cd7581a45dc9a Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:43:37 -0600 Subject: [PATCH 4/8] Remove commented code --- vec_inf/client/_slurm_script_generator.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 215526eb..4c1b0e5d 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -59,14 +59,6 @@ def __init__(self, params: dict[str, Any]): self.env_str += "," self.env_str += key + "=" + val - # # Ensure CUDA_VISIBLE_DEVICES is passed through to the container - # if self.use_container and "CUDA_VISIBLE_DEVICES" not in env_dict: - # if len(self.env_str) == 0: - # self.env_str = "--env " - # else: - # self.env_str += "," - # self.env_str += "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" - def _generate_script_content(self) -> str: """Generate the complete Slurm script content. From 38011beecad2e5eb5e458a660373c960d3802704 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:47:57 -0600 Subject: [PATCH 5/8] Apply code formatting fixes from pre-commit --- tests/vec_inf/client/test_slurm_script_generator.py | 2 -- vec_inf/client/_slurm_script_generator.py | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index bad302e2..0abd80bb 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -15,7 +15,6 @@ @pytest.fixture(autouse=True) def patch_model_weights_exists(monkeypatch): """Ensure model weights directory existence checks default to True.""" - monkeypatch.setattr( "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True ) @@ -202,7 +201,6 @@ def test_generate_launch_cmd_singularity_no_local_weights( self, singularity_params, monkeypatch ): """Test container launch when model weights directory is missing.""" - monkeypatch.setattr( "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False, diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 4c1b0e5d..364dd6b8 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -44,7 +44,9 @@ def __init__(self, params: dict[str, Any]): self.model_weights_exists = model_weights_path.exists() self.model_weights_path = str(model_weights_path) self.model_source = ( - self.model_weights_path if self.model_weights_exists else self.params["model_name"] + self.model_weights_path + if self.model_weights_exists + else self.params["model_name"] ) self.model_bind_option = ( f" --bind {self.model_weights_path}" if self.model_weights_exists else "" @@ -283,7 +285,9 @@ def _generate_model_launch_script(self, model_name: str) -> Path: ) ) vllm_args_copy = model_params["vllm_args"].copy() - model_source = model_params.get("model_source", model_params["model_weights_path"]) + model_source = model_params.get( + "model_source", model_params["model_weights_path"] + ) if "--model" in vllm_args_copy: model_source = vllm_args_copy.pop("--model") @@ -300,7 +304,7 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) - + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): script_content.append(f" {arg} \\") From 4de35635e7d8f9c2343372dc29ad59da983de832 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:57:21 -0600 Subject: [PATCH 6/8] revert unnecessary test change --- tests/vec_inf/client/test_slurm_script_generator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index 0abd80bb..62b36140 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -172,8 +172,9 @@ def test_generate_server_setup_singularity(self, singularity_params): setup = generator._generate_server_setup() assert "ray stop" in setup - # Note: module_load_cmd may be empty in some configs, so we don't assert it - # The container setup should still work without it + assert ( + "module load " in setup + ) # Remove module name since it's inconsistent between clusters def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" From 8b6a2119da676c0d077af288f72f6dc199e03af1 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 17:05:28 -0600 Subject: [PATCH 7/8] Apply formatting fixes from pre-commit --- vec_inf/client/_slurm_script_generator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 0f78fb18..01b786ea 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -121,7 +121,9 @@ def _generate_server_setup(self) -> str: server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"])) server_script.append( SLURM_SCRIPT_TEMPLATE["bind_path"].format( - model_weights_path=self.model_weights_path if self.model_weights_exists else "", + model_weights_path=self.model_weights_path + if self.model_weights_exists + else "", additional_binds=self.additional_binds, ) ) @@ -289,7 +291,9 @@ def _generate_model_launch_script(self, model_name: str) -> Path: script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"]) script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format( - model_weights_path=model_params["model_weights_path"] if model_params.get("model_weights_exists", True) else "", + model_weights_path=model_params["model_weights_path"] + if model_params.get("model_weights_exists", True) + else "", additional_binds=model_params["additional_binds"], ) ) From c68cb3553a4c7abdff595e8fef08395f66076256 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 17:21:10 -0600 Subject: [PATCH 8/8] Add tests for model weights existence coverage --- .../client/test_slurm_script_generator.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index b962f0f9..a81a962d 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -176,6 +176,21 @@ def test_generate_server_setup_singularity(self, singularity_params): "module load " in setup ) # Remove module name since it's inconsistent between clusters + def test_generate_server_setup_singularity_no_weights( + self, singularity_params, monkeypatch + ): + """Test server setup when model weights don't exist.""" + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = SlurmScriptGenerator(singularity_params) + setup = generator._generate_server_setup() + + assert "ray stop" in setup + assert "/path/to/model_weights/test-model" not in setup + def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" generator = SlurmScriptGenerator(basic_params) @@ -415,6 +430,24 @@ def test_generate_model_launch_script_singularity( mock_touch.assert_called_once() mock_write_text.assert_called_once() + @patch("pathlib.Path.touch") + @patch("pathlib.Path.write_text") + def test_generate_model_launch_script_singularity_no_weights( + self, mock_write_text, mock_touch, batch_singularity_params, monkeypatch + ): + """Test batch model launch script when model weights don't exist.""" + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = BatchSlurmScriptGenerator(batch_singularity_params) + script_path = generator._generate_model_launch_script("model1") + + assert script_path.name == "launch_model1.sh" + call_args = mock_write_text.call_args[0][0] + assert "/path/to/model_weights/model1" not in call_args + @patch("vec_inf.client._slurm_script_generator.datetime") @patch("pathlib.Path.touch") @patch("pathlib.Path.write_text")