Skip to content

Commit

Permalink
complete to add for tool
Browse files Browse the repository at this point in the history
Signed-off-by: Xue, Chendi <chendi.xue@intel.com>
  • Loading branch information
xuechendi committed Mar 11, 2024
1 parent fcf254a commit 3a0e574
Show file tree
Hide file tree
Showing 13 changed files with 350 additions and 163 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# with [tools.setuptools] in pyproject.toml, the configs below work in both baremetal and container
include inference/**/*.yaml
include inference/**/*.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_current_weather(location, unit):
"location": location,
"temperature": "78",
"unit": unit,
"forecast": ["sunny", "with a chance of meatballs"],
"forecast": ["sunny", "with a chance of rain"],
}
return weather_info

Expand Down Expand Up @@ -126,3 +126,4 @@ def _arun(self, location: str, unit: str):
agent = create_openai_tools_agent(tools=tools, llm=llm, prompt=prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor.invoke({"input": "what is the weather today in Boston?"})
agent_executor.invoke({"input": "tell me a short joke?"})

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
action="store_true",
help="Whether to enable streaming response",
)
parser.add_argument("--max_tokens", default=256, help="The maximum numbers of tokens to generate")


args = parser.parse_args()

Expand All @@ -52,6 +54,7 @@
model_name=args.model_name,
openai_api_key=openai_api_key,
streaming=args.streaming_response,
max_tokens=args.max_tokens,
)

prompt = PromptTemplate(template="list 3 {things}", input_variables=["things"])
Expand Down
42 changes: 30 additions & 12 deletions examples/inference/api_server_openai/openai_tools_call_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,35 @@
}
]
messages = [
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": "What's the weather like in Boston today?"},
[
{"role": "user", "content": "You are a helpful assistant"},
{"role": "user", "content": "What's the weather like in Boston today?"},
],
[
{"role": "user", "content": "You are a helpful assistant"},
{"role": "user", "content": "Tell me a short joke?"},
],
]
for message in messages:
print(f"User: {message[1]['content']}")
print("Assistant:", end=" ", flush=True)
chat_completion = client.chat.completions.create(
model=args.model_name,
messages=message,
max_tokens=args.max_new_tokens,
tools=tools,
tool_choice="auto",
stream=args.streaming_response,
)

chat_completion = client.chat.completions.create(
model=args.model_name,
messages=messages,
max_tokens=args.max_new_tokens,
tools=tools,
tool_choice="auto",
stream=args.streaming_response,
)

print(repr(chat_completion.choices[0].message.model_dump()))
if args.streaming_response:
for chunk in chat_completion:
content = chunk.choices[0].delta.content
if content is not None:
print(content, end="", flush=True)
tool_calls = chunk.choices[0].delta.tool_calls
if tool_calls is not None:
print(tool_calls, end="", flush=True)
print("")
else:
print(repr(chat_completion.choices[0].message.model_dump()))
12 changes: 6 additions & 6 deletions llm_on_ray/inference/api_openai_backend/openai_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,14 @@ def __str__(self):
return self.role


class DeltaEOS(BaseModel):
class Config:
extra = "forbid"


class DeltaContent(BaseModel):
content: str
tool_calls: Optional[List[Dict[str, Any]]] = None
tool_calls: Optional[List[ToolCall]] = None

def __str__(self):
if self.tool_calls:
Expand All @@ -175,11 +180,6 @@ def __str__(self):
return str(self.dict())


class DeltaEOS(BaseModel):
class Config:
extra = "forbid"


class DeltaChoices(BaseModel):
delta: Union[DeltaRole, DeltaContent, DeltaEOS]
index: int
Expand Down
3 changes: 1 addition & 2 deletions llm_on_ray/inference/api_openai_backend/query_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ async def query(self, model: str, prompt: Prompt, request_id: str, streaming_rep
else:
raise HTTPException(404, f"Could not find model with id {model}")

prompt_content = prompt.prompt
request_config = prompt.parameters
temperature = request_config.get("temperature", 1.0)
top_p = request_config.get("top_p", 1.0)
Expand All @@ -64,7 +63,7 @@ async def query(self, model: str, prompt: Prompt, request_id: str, streaming_rep
async_iterator=deploy_handle.options(stream=True)
.openai_call.options(stream=True, use_new_handle_api=True)
.remote(
prompt_content,
prompt.prompt,
gen_config,
streaming_response=streaming_reponse,
tools=prompt.tools,
Expand Down
31 changes: 17 additions & 14 deletions llm_on_ray/inference/api_openai_backend/router_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,15 @@ async def _chat_completions_wrapper(
finish_reason=None,
)
]
yield "data: " + ChatCompletionResponse(
chunk = ChatCompletionResponse(
id=completion_id,
object="chat.completion.chunk",
model=body.model,
choices=choices,
usage=None,
).json() + "\n\n"
)
data = chunk.json()
yield f"data: {data}\n\n"

all_results = []
async for results in generator:
Expand Down Expand Up @@ -198,13 +200,16 @@ async def _chat_completions_wrapper(
finish_reason=None,
)
]
yield "data: " + ChatCompletionResponse(
chunk = ChatCompletionResponse(
id=completion_id,
object="chat.completion.chunk",
model=body.model,
choices=choices,
usage=None,
).json() + "\n\n"
)
# data = chunk.json(exclude_unset=True, ensure_ascii=False)
data = chunk.json()
yield f"data: {data}\n\n"
if had_error:
# Return early in case of an error
break
Expand All @@ -221,13 +226,15 @@ async def _chat_completions_wrapper(
if all_results
else None
)
yield "data: " + ChatCompletionResponse(
chunk = ChatCompletionResponse(
id=completion_id,
object="chat.completion.result",
model=body.model,
choices=choices,
usage=usage,
).json() + "\n\n"
)
data = chunk.json()
yield f"data: {data}\n\n"
yield "data: [DONE]\n\n"


Expand Down Expand Up @@ -333,17 +340,13 @@ async def chat(
Returns:
A response object with completions.
"""
tools = body.tools
tool_choice = body.tool_choice
# Doing this to remove them from sampling params
body.tools = None
body.tool_choice = None

prompt = Prompt(
prompt=body.messages, parameters=dict(body), tools=tools, tool_choice=tool_choice
prompt=body.messages,
parameters=dict(body),
tools=body.tools,
tool_choice=body.tool_choice,
)
request_id = f"chatcmpl-{str(uuid.uuid4().hex)}"

if body.stream:
return StreamingResponse(
_chat_completions_wrapper(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{%- set func_call_token = "!function_call:" -%} {#- The special prefix to functions calls, be aware of extra space or new lines ! -#}

{%- if CONTEXT == CALL_TOKEN -%} {#- return only the func_call_token value. Needed by the implementation. No data included -#}
{{- func_call_token -}}
{%- endif -%} {#- CONTEXT == CALL_TOKEN -#}

{%- if CONTEXT == CALLS_NOTIF -%} {#- Format the notification of the function call. Data: tool_calls = ToolCall -#}
{%- for call in tool_calls -%}
{%- if call.function.arguments == None or call.function.arguments|count == 0 -%}
{{- call.id }} was called with no argument
{%- else -%}
{{- call.id }} was called with arguments : {{- call.function.arguments -}}
{%- endif -%}
{%- raw %}
{% endraw -%}
{%- endfor -%}
{%- endif -%} {#- CONTEXT == CALLS_NOTIF -#}

{%- if CONTEXT == TOOL_RESPONSE -%} {#- Format of the response of the function call. Data: message = ChatMessage -#}
{{- message.content -}}
{%- endif -%} {#- CONTEXT == TOOL_RESPONSE -#}

{%- if CONTEXT == FORCE_CALL -%} {#- One tool call defined request. Data: tool = ToolCall -#}
You must call the following function at least one time to answer the question. You may call it multiple times if needed:
{%- if tool.function.parameters == None or tool.function.parameters|count == 0 -%} {#- without parameter #}
{'name': "{{tool.function.name}}", 'description': "{{tool.function.description}}", 'arguments': null},
{%- else -%} {#- with parameters #}
{'name': "{{tool.function.name}}", 'description': "{{tool.function.description}}", 'arguments': { {{tool.function.parameters}} {{ '}}' }},
{%- endif %} {#- tool.function.parameters #}
{%- endif -%} {#- CONTEXT == FORCE_CALL -#}

{%- if CONTEXT == FUNCTIONS_LIST -%} {#- Functions list generation Data: tools_list = List[ToolCall] -#}
{%- raw -%}The following is a list of external functions that may be called to complete certain tasks:
[
{%- endraw -%}
{%- for tool in tools_list -%}
{%- if tool.function.parameters == None or tool.function.parameters|count == 0 -%} {#- without parameter #}
{'name': "{{tool.function.name}}", 'description': "{{tool.function.description}}", 'arguments': null},
{%- else -%} {#- with parameters #}
{'name': "{{tool.function.name}}", 'description': "{{tool.function.description}}", 'arguments': { {{tool.function.parameters}} {{ '}}' }},
{% endif -%} {#- tool.function.parameters #}
{%- endfor -%}
{%- raw %}
]
End of list

* Whenever the user asks you something, you can either respond directly or invoke a function if it is present in the previous list.
* The decision to invoke a function is yours, only invoke a function if it is necessary to answer the user's question
* If you need to call at least one function, your message should contain only a list of function calls and nothing else; the function calls are the response.
{%- endraw %}
{%- endif -%} {#- CONTEXT == FUNCTIONS_LIST -#}

{%- if CONTEXT == FORCE_CALL or CONTEXT == FUNCTIONS_LIST -%}
To call a function, the message must start by "{{func_call_token}}" followed by a json like this:
* With arguments:
{{func_call_token}}{"name": "function_name", "arguments": {"arg1": "value1"}}
* Without arguments:
{{func_call_token}}{"name": "function_name", "arguments": null}
End of functions instructions
{%- endif -%} {#- CONTEXT == FORCE_CALL or CONTEXT == FUNCTIONS_LIST -#}
Loading

0 comments on commit 3a0e574

Please sign in to comment.