In [26]:
from pydantic import BaseModel, Field
import json
import litellm
import requests
import time
from termcolor import colored
import dotenv

from openai_function_calling import FunctionInferrer

In [9]:
dotenv.load_dotenv()

True

In [2]:
vllm_url = "http://localhost:8005"

max_wait_seconds = 120
print("Waiting for vLLM to start")
for sec in range(max_wait_seconds):
    try:
        if requests.get(f"{vllm_url}/health").status_code == 200:
            break
    except Exception:
        pass
    time.sleep(1)
else:
    raise RuntimeError("vLLM server failed to start within the expected time.")
print(f"vLLM started in {sec} seconds")

Waiting for vLLM to start
vLLM started in 0 seconds


In [3]:
def print_response(response):
    message = response.choices[0].message
    if hasattr(message, "reasoning_content"):
        print(f"{colored('Reasoning:', 'red')}\n{message.reasoning_content}\n")
    print(f"{colored('Content:', 'red')}\n{message.content}\n")
    print(f"{colored('Tool Calls:', 'red')}")
    tool_calls = message.tool_calls
    if tool_calls:
        for call in tool_calls:
            print(f"{call.function.name}: {call.function.arguments} (id: {call.id})")
    else:
        print("None")

In [4]:
def strip_keys(d: dict, keys: set[str]) -> dict:
    new = {}
    for k, v in d.items():
        if k in keys:
            continue
        new[k] = strip_keys(v, keys) if isinstance(v, dict) else v
    return new

In [5]:
def make_func_schema(name: str, description: str, params: BaseModel) -> dict:
    param_schema = strip_keys(params.model_json_schema(), {"title"})
    func_schema = {
        "type": "function",
        "function": {
            "name": name,
            "description": description,
            "parameters": {
                "type": "object",
                **param_schema,
                "additionalProperties": False,
            },
        },
        "strict": True,
    }
    return func_schema

In [6]:
class GetWeatherFuncParams(BaseModel):
    # location: str = Field(..., description="Location for which to get the weather")
    locations: list[str] = Field(
        ..., description="List of locations for which to get the current weather"
    )


class ReplyFuncParams(BaseModel):
    msg: str = Field(..., description="The free-form reply message to the user")


get_weather_func_schema = make_func_schema(
    name="get_weather",
    description="Get the current weather for the location(s)",
    params=GetWeatherFuncParams,
)

reply_func_schema = make_func_schema(
    name="reply",
    description="Reply to the user with a free-form message",
    params=ReplyFuncParams,
)

tools = [get_weather_func_schema, reply_func_schema]

print(json.dumps(tools, indent=2))

[
  {
    "type": "function",
    "function": {
      "name": "get_weather",
      "description": "Get the current weather for the location(s)",
      "parameters": {
        "type": "object",
        "properties": {
          "locations": {
            "description": "List of locations for which to get the current weather",
            "items": {
              "type": "string"
            },
            "type": "array"
          }
        },
        "required": [
          "locations"
        ],
        "additionalProperties": false
      }
    },
    "strict": true
  },
  {
    "type": "function",
    "function": {
      "name": "reply",
      "description": "Reply to the user with a free-form message",
      "parameters": {
        "type": "object",
        "properties": {
          "msg": {
            "description": "The free-form reply message to the user",
            "type": "string"
          }
        },
        "required": [
          "msg"
        ],
        "additionalProp

In [7]:
conversation = [
    {
        "role": "system",
        "content": "You are a helpful assistant. Don't overthink your answers. Reason concisely and minimally.",
    },
    {
        "role": "user",
        "content": "What's the weather like in 10 cities in the world? Pick any cities you like.",
    },
]

In [24]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for the location(s)",
            "parameters": {
                "type": "object",
                "properties": {
                    "locations": {
                        # "description": "List of locations for which to get the current weather",
                        "items": {"type": "string"},
                        "type": "array",
                    }
                },
                "required": ["locations"],
                "additionalProperties": False,
            },
        },
        "strict": True,
    },
]

In [25]:
# litellm.drop_params = True  # Drop params incompatible with the model

model = "openai/gpt-5-mini"
# model="openai/Qwen3-30B-A3B-Thinking-2507-FP8"
# model = "openai/Qwen3-4B-Instruct-2507"

response = litellm.completion(
    model=model,
    api_base=f"{vllm_url}/v1" if "gpt" not in model else None,
    messages=conversation,
    tools=tools,
    tool_choice="required",
)
print_response(response)

[31mContent:[0m
None

[31mTool Calls:[0m
get_weather: {"locations":["New York, USA","London, UK","Tokyo, Japan","Sydney, Australia","Cairo, Egypt","Rio de Janeiro, Brazil","Moscow, Russia","Mumbai, India","Cape Town, South Africa","Toronto, Canada"]} (id: call_F5zHsfLfnGS44VMZS1Pr57mb)


In [51]:
args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)

In [None]:
args["locations"]

['New York, USA',
 'London, UK',
 'Tokyo, Japan',
 'Sydney, Australia',
 'Cairo, Egypt',
 'Rio de Janeiro, Brazil',
 'Moscow, Russia',
 'Mumbai, India',
 'Cape Town, South Africa',
 'Toronto, Canada']

In [43]:
def get_weather(locations: list[str], use_cache: bool = True) -> list[float]:
    """
    Get the current weather for the given locations in Celsius. Returns a list of float.
    Args:
        locations (list[str]): A list of location names.
        use_cache (bool, optional): Whether to use cached results. Defaults to True.
    """
    return [25.4] * len(locations)  # Dummy implementation


schema = FunctionInferrer.infer_from_function_reference(get_weather)
print(json.dumps(schema.to_json_schema(), indent=2))

{
  "name": "get_weather",
  "description": "Get the current weather for the given locations in Celsius. Returns a list of float.",
  "parameters": {
    "type": "object",
    "properties": {
      "locations": {
        "type": "array",
        "description": "A list of location names.",
        "items": {
          "type": "string"
        }
      },
      "use_cache": {
        "type": "boolean",
        "description": "Whether to use cached results. Defaults to True."
      }
    },
    "required": [
      "locations"
    ]
  }
}
