diff --git a/vllm/docker/Dockerfile b/vllm/docker/Dockerfile index 3657ad3..d041c16 100644 --- a/vllm/docker/Dockerfile +++ b/vllm/docker/Dockerfile @@ -100,6 +100,8 @@ CMD ["bash", "-c", "source /root/.bashrc && exec bash"] # ======== OpenAI Serving Stage ======== FROM vllm-base AS vllm-openai +COPY ./examples/offline_inference.py /llm/ + ARG http_proxy ARG https_proxy diff --git a/vllm/examples/offline_inference.py b/vllm/examples/offline_inference.py new file mode 100644 index 0000000..4e16da4 --- /dev/null +++ b/vllm/examples/offline_inference.py @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from vllm import LLM, SamplingParams + +# Sample prompts. +prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", +] +# Create a sampling params object. +sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + + +def main(): + # Create an LLM. + llm = LLM(model="MODEL_PATH") + # Generate texts from the prompts. + # The output is a list of RequestOutput objects + # that contain the prompt, generated text, and other information. + outputs = llm.generate(prompts, sampling_params) + # Print the outputs. + print("\nGenerated Outputs:\n" + "-" * 60) + for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}") + print(f"Output: {generated_text!r}") + print("-" * 60) + + +if __name__ == "__main__": + main()