In [1]:
from pydantic import BaseModel, Field
import json
import litellm
import requests
import time
from termcolor import colored

In [3]:
vllm_url = "http://localhost:8000"

max_wait_seconds = 120
print("Waiting for vLLM to start")
for sec in range(max_wait_seconds):
    try:
        if requests.get(f"{vllm_url}/health").status_code == 200:
            break
    except Exception:
        pass
    time.sleep(1)
else:
    raise RuntimeError("vLLM server failed to start within the expected time.")
print(f"vLLM started in {sec} seconds")

Waiting for vLLM to start
vLLM started in 0 seconds


In [44]:
def print_response(response):
    message = response.choices[0].message
    if hasattr(message, "reasoning_content"):
        print(f"{colored('Reasoning:', 'red')}\n{message.reasoning_content}\n")
    print(f"{colored('Content:', 'red')}\n{message.content}\n")
    print(f"{colored('Tool Calls:', 'red')}")
    tool_calls = message.tool_calls
    if tool_calls:
        for call in tool_calls:
            print(f"{call.function.name}: {call.function.arguments} (id: {call.id})")
    else:
        print("None")

In [45]:
def strip_keys(d: dict, keys: set[str]) -> dict:
    new = {}
    for k, v in d.items():
        if k in keys:
            continue
        new[k] = strip_keys(v, keys) if isinstance(v, dict) else v
    return new

In [47]:
def make_func_schema(name: str, description: str, params: BaseModel) -> dict:
    param_schema = strip_keys(params.model_json_schema(), {"title"})
    func_schema = {
        "type": "function",
        "function": {
            "name": name,
            "description": description,
            "parameters": {
                "type": "object",
                **param_schema,
                "additionalProperties": False,
            },
        },
        "strict": True,
    }
    return func_schema

In [70]:
class GetWeatherFuncParams(BaseModel):
    # location: str = Field(..., description="Location for which to get the weather")
    locations: list[str] = Field(
        ..., description="List of locations for which to get the current weather"
    )


class ReplyFuncParams(BaseModel):
    msg: str = Field(..., description="The free-form reply message to the user")


get_weather_func_schema = make_func_schema(
    name="get_weather",
    description="Get the current weather for the location(s)",
    params=GetWeatherFuncParams,
)

reply_func_schema = make_func_schema(
    name="reply",
    description="Reply to the user with a free-form message",
    params=ReplyFuncParams,
)

tools = [get_weather_func_schema, reply_func_schema]

print(json.dumps(tools, indent=2))

[
  {
    "type": "function",
    "function": {
      "name": "get_weather",
      "description": "Get the current weather for the location(s)",
      "parameters": {
        "type": "object",
        "properties": {
          "locations": {
            "description": "List of locations for which to get the current weather",
            "items": {
              "type": "string"
            },
            "type": "array"
          }
        },
        "required": [
          "locations"
        ],
        "additionalProperties": false
      }
    },
    "strict": true
  },
  {
    "type": "function",
    "function": {
      "name": "reply",
      "description": "Reply to the user with a free-form message",
      "parameters": {
        "type": "object",
        "properties": {
          "msg": {
            "description": "The free-form reply message to the user",
            "type": "string"
          }
        },
        "required": [
          "msg"
        ],
        "additionalProp

In [82]:
conversation = [
    {
        "role": "system",
        "content": "You are a helpful assistant. Don't overthink your answers. Reason concisely and minimally.",
    },
    {
        "role": "user",
        "content": "What's the weather like in 10 cities in the world? Pick any cities you like.",
    },
]

In [83]:
# litellm.drop_params = True  # Drop params incompatible with the model

response = litellm.completion(
    # model="openai/gpt-5-mini",
    model="openai/Qwen3-30B-A3B-Thinking-2507-FP8",
    api_base=f"{vllm_url}/v1",
    messages=conversation,
    tools=tools,
    tool_choice="required",
)
print_response(response)

[31mReasoning:[0m
Okay, the user is asking for the weather in 10 cities around the world. Let me check the available tools. There's the get_weather function that takes a list of locations. The user says to pick any cities, so I need to choose 10. Let me think of some major cities: New York, London, Tokyo, Paris, Sydney, Mumbai, Rio de Janeiro, Cape Town, Moscow, and Dubai. Wait, I should make sure they're all distinct and well-known. Let me confirm those are correct. New York, London, Tokyo, Paris, Sydney, Mumbai, Rio, Cape Town, Moscow, Dubai. Yep, that's 10. Now, I need to call get_weather with these locations. The function requires an array of strings, so I'll structure the arguments as a list. Let me double-check the parameters. The function's required parameter is "locations" as an array. So the tool call should have "locations" with those 10 cities. I'll make sure to format it correctly in JSON. No need for the reply function yet since the user wants the weather data. Alright, 