intel · gc-fu · Sep 10, 2025 · Sep 9, 2025
diff --git a/vllm/docker/Dockerfile b/vllm/docker/Dockerfile
@@ -100,6 +100,8 @@ CMD ["bash", "-c", "source /root/.bashrc && exec bash"]
 # ======== OpenAI Serving Stage ========
 FROM vllm-base AS vllm-openai
 
+COPY ./examples/offline_inference.py /llm/
+
 ARG http_proxy
 ARG https_proxy
 

diff --git a/vllm/examples/offline_inference.py b/vllm/examples/offline_inference.py
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm import LLM, SamplingParams
+
+# Sample prompts.
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+# Create a sampling params object.
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+
+def main():
+    # Create an LLM.
+    llm = LLM(model="MODEL_PATH")
+    # Generate texts from the prompts.
+    # The output is a list of RequestOutput objects
+    # that contain the prompt, generated text, and other information.
+    outputs = llm.generate(prompts, sampling_params)
+    # Print the outputs.
+    print("\nGenerated Outputs:\n" + "-" * 60)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt:    {prompt!r}")
+        print(f"Output:    {generated_text!r}")
+        print("-" * 60)
+
+
+if __name__ == "__main__":
+    main()