From 849de275730af9eaf86a93eb131855c018675006 Mon Sep 17 00:00:00 2001 From: Aaron E Date: Thu, 25 Apr 2024 17:19:03 -0600 Subject: [PATCH] Fixing VLLM integration to handle 0.4.1 The name of a parameter changed, so this util needs to be updated. Updating requirements.txt to match. --- llama3-8b-instruct/bentovllm_openai/utils.py | 5 +++-- llama3-8b-instruct/requirements.txt | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llama3-8b-instruct/bentovllm_openai/utils.py b/llama3-8b-instruct/bentovllm_openai/utils.py index 90c3758..6921cfb 100644 --- a/llama3-8b-instruct/bentovllm_openai/utils.py +++ b/llama3-8b-instruct/bentovllm_openai/utils.py @@ -51,7 +51,8 @@ def __init__( chat_template=None, ): super(OpenAIServingChat, self).__init__( - engine=engine, served_model=served_model, + engine=engine, + served_model_names=served_model, lora_modules=None, ) self.response_role = response_role @@ -75,7 +76,7 @@ async def _load_chat_template(self, chat_template): return super()._load_chat_template(chat_template) self.openai_serving_completion = OpenAIServingCompletion( - engine=self.engine, served_model=served_model, + engine=self.engine, served_model_names=served_model, ) self.chat_template = chat_template diff --git a/llama3-8b-instruct/requirements.txt b/llama3-8b-instruct/requirements.txt index fce4600..ee85ea2 100644 --- a/llama3-8b-instruct/requirements.txt +++ b/llama3-8b-instruct/requirements.txt @@ -1,6 +1,6 @@ accelerate==0.29.3 -bentoml>=1.2.11 +bentoml>=1.2.12 packaging==24.0 -torch==2.1.2 -transformers==4.39.3 -vllm==0.4.0.post1 +torch==2.2.1 +transformers==4.40.0 +vllm==0.4.1 \ No newline at end of file