In [84]:
from pydantic import BaseModel, Field
import json
from openai import OpenAI
import litellm
import requests
import time
from termcolor import colored

In [85]:
vllm_url = "http://localhost:8000"

max_wait_seconds = 120
print("Waiting for vLLM to start")
for sec in range(max_wait_seconds):
    try:
        if requests.get(f"{vllm_url}/health").status_code == 200:
            break
    except Exception:
        pass
    time.sleep(1)
else:
    raise RuntimeError("vLLM server failed to start within the expected time.")
print(f"vLLM started in {sec} seconds")

Waiting for vLLM to start
vLLM started in 0 seconds


In [86]:
oai_client = OpenAI()

In [87]:
class GetWeatherFuncParams(BaseModel):
    location: str = Field(..., description="Location for which to get the weather")


param_schema = GetWeatherFuncParams.model_json_schema()

# Pretty print
print(json.dumps(param_schema, indent=2))

{
  "properties": {
    "location": {
      "description": "Location for which to get the weather",
      "title": "Location",
      "type": "string"
    }
  },
  "required": [
    "location"
  ],
  "title": "GetWeatherFuncParams",
  "type": "object"
}


In [88]:
func_schema = {
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get the current weather for the given location.",
        "parameters": {
            "type": "object",
            **param_schema,
            "additionalProperties": False,
        },
    },
    "strict": True,
}

# Pretty print
print(json.dumps(func_schema, indent=2))

{
  "type": "function",
  "function": {
    "name": "get_weather",
    "description": "Get the current weather for the given location.",
    "parameters": {
      "type": "object",
      "properties": {
        "location": {
          "description": "Location for which to get the weather",
          "title": "Location",
          "type": "string"
        }
      },
      "required": [
        "location"
      ],
      "title": "GetWeatherFuncParams",
      "additionalProperties": false
    }
  },
  "strict": true
}


In [97]:
conversation = [
    {
        "role": "user",
        "content": "What's the weather like in New York City and Beijing today? "
        "Explain which tool you would use, and call it. The explanation is mandatory.",
    },
]
tools = [func_schema]

In [111]:
def print_response(response):
    message = response.choices[0].message
    if hasattr(message, "reasoning_content"):
        print(f"{colored('Reasoning:', 'red')}\n{message.reasoning_content}\n")
    print(f"{colored('Content:', 'red')}\n{message.content}\n")
    print(f"{colored('Tool Calls:', 'red')}")
    tool_calls = message.tool_calls
    if tool_calls:
        for call in tool_calls:
            print(f"{call.function.name}: {call.function.arguments} (id: {call.id})")
    else:
        print("None")

In [112]:
response = oai_client.chat.completions.create(
    model="gpt-5-mini",
    messages=conversation,
    tools=tools,
    tool_choice="required",
)
print_response(response)

[31mContent:[0m
None

[31mTool Calls:[0m
get_weather: {"location": "New York City, USA"} (id: call_SqaUiFywgfgGwX76UZ4n48Jd)
get_weather: {"location": "Beijing, China"} (id: call_0WrNxoEQd54c7wqTZFO0KULu)


In [113]:
litellm.drop_params = True  # Drop params incompatible with the model

response = litellm.completion(
    # model="openai/gpt-5-mini",
    model="openai/Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
    api_base=f"{vllm_url}/v1",
    messages=conversation,
    tools=tools,
    tool_choice="none",
    reasoning_effort="low",
)
print_response(response)

[31mReasoning:[0m
Okay, the user is asking for the weather in New York City and Beijing today. Let me check the tools provided. There's only one function called get_weather that takes a location. The problem is, the user wants two locations. The function can only handle one location at a time. So I need to call the function twice, once for each city.

Wait, the instructions say to call the function for each location. The user said "explain which tool you would use and call it." So I should explain that I'll use the get_weather function for each city. But the tool can only be called once per location. So I need to make two separate tool calls.

First, for New York City. Then for Beijing. Each requires a separate tool_call. The explanation must be included. Let me make sure the parameters are correct. The function needs "location" as a required string. So I'll structure two tool calls with each city as the location.

Wait, the user might expect me to mention that the tool can only hand