VectorInstitute · jwilles · Apr 10, 2025 · Mar 2, 2025 · Mar 2, 2025 · Mar 11, 2025
diff --git a/.github/workflows/code_checks.yml b/.github/workflows/code_checks.yml
@@ -36,7 +36,7 @@ jobs:
           version: "0.5.21"
           enable-cache: true
       - name: "Set up Python"
-        uses: actions/setup-python@v5.4.0
+        uses: actions/setup-python@v5.5.0
         with:
           python-version-file: ".python-version"
       - name: Install the project

diff --git a/.github/workflows/docs_build.yml b/.github/workflows/docs_build.yml
@@ -33,7 +33,7 @@ jobs:
           enable-cache: true
 
       - name: "Set up Python"
-        uses: actions/setup-python@8039c45ed9a312fba91f3399cd0605ba2ebfe93c
+        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55
         with:
           python-version-file: ".python-version"
 

diff --git a/.github/workflows/docs_deploy.yml b/.github/workflows/docs_deploy.yml
@@ -38,7 +38,7 @@ jobs:
           enable-cache: true
 
       - name: "Set up Python"
-        uses: actions/setup-python@8039c45ed9a312fba91f3399cd0605ba2ebfe93c
+        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55
         with:
           python-version-file: ".python-version"
 

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -21,7 +21,7 @@ jobs:
           version: "0.6.6"
           enable-cache: true
 
-      - uses: actions/setup-python@v5.4.0
+      - uses: actions/setup-python@v5.5.0
         with:
           python-version: '3.10'
 

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -53,7 +53,7 @@ jobs:
           enable-cache: true
 
       - name: "Set up Python ${{ matrix.python-version }}"
-        uses: actions/setup-python@v5.4.0
+        uses: actions/setup-python@v5.5.0
         with:
           python-version: ${{ matrix.python-version }}
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,7 @@ repos:
     - id: check-toml
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: 'v0.11.0'
+    rev: 'v0.11.2'
     hooks:
     - id: ruff
       args: [--fix, --exit-non-zero-on-fix]

diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@
 [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/develop/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/develop)
 ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
 
-This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **All scripts in this repository runs natively on the Vector Institute cluster environment**. To adapt to other environments, update the environment variables in [`cli/_helper.py`](vec_inf/cli/_helper.py), [`cli/_config.py`](vec_inf/cli/_config.py), [`vllm.slurm`](vec_inf/vllm.slurm), [`multinode_vllm.slurm`](vec_inf/multinode_vllm.slurm) and [`models.yaml`](vec_inf/config/models.yaml) accordingly.
+This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **All scripts in this repository runs natively on the Vector Institute cluster environment**. To adapt to other environments, update the environment variables in [`shared/utils.py`](vec_inf/shared/utils.py), [`shared/config.py`](vec_inf/shared/config.py), [`vllm.slurm`](vec_inf/vllm.slurm), [`multinode_vllm.slurm`](vec_inf/multinode_vllm.slurm) and [`models.yaml`](vec_inf/config/models.yaml) accordingly.
 
 ## Installation
 If you are using the Vector cluster environment, and you don't need any customization to the inference server environment, run the following to install package:

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -7,7 +7,6 @@
 
 import os
 import sys
-from typing import List
 
 
 sys.path.insert(0, os.path.abspath("../../vec_inf"))
@@ -51,8 +50,16 @@
 copybutton_prompt_text = r">>> |\.\.\. "
 copybutton_prompt_is_regexp = True
 
+apidoc_module_dir = "../../vec_inf"
+apidoc_excluded_paths = ["tests", "cli", "shared"]
+exclude_patterns = ["reference/api/vec_inf.rst"]
+apidoc_output_dir = "reference/api"
+apidoc_separate_modules = True
+apidoc_extra_args = ["-f", "-M", "-T", "--implicit-namespaces"]
+suppress_warnings = ["ref.python"]
+
 intersphinx_mapping = {
-    "python": ("https://docs.python.org/3.9/", None),
+    "python": ("https://docs.python.org/3.10/", None),
 }
 
 # Add any paths that contain templates here, relative to this directory.
@@ -61,7 +68,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns: List[str] = []
+exclude_patterns = ["reference/api/vec_inf.rst"]
 
 # -- Options for Markdown files ----------------------------------------------
 #

diff --git a/docs/source/index.md b/docs/source/index.md
@@ -8,10 +8,11 @@ hide-toc: true
 :hidden:
 
 user_guide
+reference/api/index
 
 ```
 
-This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **All scripts in this repository runs natively on the Vector Institute cluster environment**. To adapt to other environments, update the environment variables in [`cli/_helper.py`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/cli/_helper.py), [`cli/_config.py`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/cli/_config_.py), [`vllm.slurm`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/vllm.slurm), [`multinode_vllm.slurm`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/multinode_vllm.slurm), and model configurations in [`models.yaml`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config/models.yaml) accordingly.
+This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **All scripts in this repository runs natively on the Vector Institute cluster environment**. To adapt to other environments, update the environment variables in [`shared/utils.py`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/shared/utils.py), [`shared/config.py`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/shared/config_.py), [`vllm.slurm`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/vllm.slurm), [`multinode_vllm.slurm`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/multinode_vllm.slurm), and model configurations in [`models.yaml`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config/models.yaml) accordingly.
 
 ## Installation
 

diff --git a/docs/source/reference/api/index.rst b/docs/source/reference/api/index.rst
@@ -0,0 +1,9 @@
+Python API
+==========
+
+This section documents the Python API for the `vec_inf` package.
+
+.. toctree::
+   :maxdepth: 4
+
+   vec_inf.api
diff --git a/docs/source/reference/api/vec_inf.api.client.rst b/docs/source/reference/api/vec_inf.api.client.rst
@@ -0,0 +1,7 @@
+vec\_inf.api.client module
+==========================
+
+.. automodule:: vec_inf.api.client
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/api/vec_inf.api.models.rst b/docs/source/reference/api/vec_inf.api.models.rst
@@ -0,0 +1,7 @@
+vec\_inf.api.models module
+==========================
+
+.. automodule:: vec_inf.api.models
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/api/vec_inf.api.rst b/docs/source/reference/api/vec_inf.api.rst
@@ -0,0 +1,17 @@
+vec\_inf.api package
+====================
+
+.. automodule:: vec_inf.api
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Submodules
+----------
+
+.. toctree::
+   :maxdepth: 4
+
+   vec_inf.api.client
+   vec_inf.api.models
+   vec_inf.api.utils
diff --git a/docs/source/reference/api/vec_inf.api.utils.rst b/docs/source/reference/api/vec_inf.api.utils.rst
@@ -0,0 +1,7 @@
+vec\_inf.api.utils module
+=========================
+
+.. automodule:: vec_inf.api.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/api/vec_inf.rst b/docs/source/reference/api/vec_inf.rst
@@ -0,0 +1,15 @@
+vec\_inf package
+================
+
+.. automodule:: vec_inf
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+   :maxdepth: 4
+
+   vec_inf.api
diff --git a/docs/source/user_guide.md b/docs/source/user_guide.md
@@ -1,6 +1,6 @@
 # User Guide
 
-## Usage
+## CLI Usage
 
 ### `launch` command
 
@@ -17,7 +17,7 @@ You should see an output like the following:
 
 #### Overrides
 
-Models that are already supported by `vec-inf` would be launched using the [default parameters](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. For example, if `qos` is to be overriden:
+Models that are already supported by `vec-inf` would be launched using the [default parameters](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. For example, if `qos` is to be overriden:
 
 ```bash
 vec-inf launch Meta-Llama-3.1-8B-Instruct --qos <new_qos>
@@ -29,7 +29,7 @@ You can also launch your own custom model as long as the model architecture is [
 * Your model weights directory naming convention should follow `$MODEL_FAMILY-$MODEL_VARIANT` ($MODEL_VARIANT is OPTIONAL).
 * Your model weights directory should contain HuggingFace format weights.
 * You should specify your model configuration by:
-  * Creating a custom configuration file for your model and specify its path via setting the environment variable `VEC_INF_CONFIG`. Check the [default parameters](vec_inf/config/models.yaml) file for the format of the config file. All the parameters for the model should be specified in that config file.
+  * Creating a custom configuration file for your model and specify its path via setting the environment variable `VEC_INF_CONFIG`. Check the [default parameters](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config/models.yaml) file for the format of the config file. All the parameters for the model should be specified in that config file.
   * Using launch command options to specify your model setup.
 * For other model launch parameters you can reference the default values for similar models using the [`list` command ](#list-command).
 
@@ -179,3 +179,10 @@ If you want to run inference from your local device, you can open a SSH tunnel t
 ssh -L 8081:172.17.8.29:8081 username@v.vectorinstitute.ai -N
 ```
 Where the last number in the URL is the GPU number (gpu029 in this case). The example provided above is for the vector cluster, change the variables accordingly for your environment
+
+## Python API Usage
+
+You can also use the `vec_inf` Python API to launch and manage inference servers.
+
+Check out the [Python API documentation](reference/api/index) for more details. There
+are also Python API usage examples in the [`examples`](https://github.com/VectorInstitute/vector-inference/tree/develop/examples/api) folder.
diff --git a/examples/README.md b/examples/README.md
@@ -7,3 +7,6 @@
   - [`vlm/vision_completions.py`](inference/vlm/vision_completions.py): Python example of sending chat completion requests with image attached to prompt to OpenAI compatible server for vision language models
 - [`logits`](logits): Example for logits generation
   - [`logits.py`](logits/logits.py): Python example of getting logits from hosted model.
+- [`api`](api): Examples for using the Python API
+  - [`basic_usage.py`](api/basic_usage.py): Basic Python example demonstrating the Vector Inference API
+  - [`advanced_usage.py`](api/advanced_usage.py): Advanced Python example with rich UI for the Vector Inference API
diff --git a/examples/api/basic_usage.py b/examples/api/basic_usage.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+"""Basic example of Vector Inference API usage.
+
+This script demonstrates the core features of the Vector Inference API
+for launching and interacting with models.
+"""
+
+from vec_inf.client import VecInfClient
+
+
+# Create the API client
+client = VecInfClient()
+
+# List available models
+print("Listing available models...")
+models = client.list_models()
+print(f"Found {len(models)} models")
+for model in models[:3]:  # Show just the first few
+    print(f"- {model.name} ({model.type})")
+
+# Launch a model (replace with an actual model name from your environment)
+model_name = "Meta-Llama-3.1-8B-Instruct"  # Use an available model from your list
+print(f"\nLaunching {model_name}...")
+response = client.launch_model(model_name)
+job_id = response.slurm_job_id
+print(f"Launched with job ID: {job_id}")
+
+# Wait for the model to be ready
+print("Waiting for model to be ready...")
+status = client.wait_until_ready(job_id)
+print(f"Model is ready at: {status.base_url}")
+
+# Get metrics
+print("\nRetrieving metrics...")
+metrics = client.get_metrics(job_id)
+if isinstance(metrics.metrics, dict):
+    for key, value in metrics.metrics.items():
+        print(f"- {key}: {value}")
+
+# Shutdown when done
+print("\nShutting down model...")
+client.shutdown_model(job_id)
+print("Model shutdown complete")
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ dev = [
     "codecov>=2.1.13",
     "mypy>=1.15.0",
     "nbqa>=1.9.1",
+    "openai>=1.65.1",
     "pip-audit>=2.8.0",
     "pre-commit>=4.1.0",
     "pytest>=8.3.4",
@@ -59,6 +60,9 @@ vec-inf = "vec_inf.cli._cli:cli"
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 
+[tool.hatch.build.targets.wheel]
+packages = ["vec_inf"]
+
 [tool.mypy]
 ignore_missing_imports = true
 install_types = true

diff --git a/tests/test_imports.py b/tests/test_imports.py
@@ -2,16 +2,28 @@
 
 import unittest
 
+import pytest
+
 
 class TestVecInfImports(unittest.TestCase):
     """Test the imports of the vec_inf package."""
 
-    def test_import_cli_modules(self):
-        """Test the imports of the vec_inf.cli modules."""
+    def test_imports(self):
+        """Test that all modules can be imported."""
         try:
+            # CLI imports
+            import vec_inf.cli
             import vec_inf.cli._cli
-            import vec_inf.cli._config
             import vec_inf.cli._helper
-            import vec_inf.cli._utils  # noqa: F401
+
+            # Client imports
+            import vec_inf.client
+            import vec_inf.client._config
+            import vec_inf.client._exceptions
+            import vec_inf.client._helper
+            import vec_inf.client._models
+            import vec_inf.client._utils
+            import vec_inf.client._vars  # noqa: F401
+
         except ImportError as e:
-            self.fail(f"Import failed: {e}")
+            pytest.fail(f"Import failed: {e}")