# Using Guidance With the new OpenAI Harmony Response Format

First, we define a guidance grammar in accordance with the cookbook https://cookbook.openai.com/articles/openai-harmony

In [1]:
from guidance import gen, special_token

def analysis():
    return (
        special_token("<|start|>")
        + "assistant"
        + special_token("<|channel|>")
        + "analysis"
        + special_token("<|message|>")
        # We could add a more specific grammar here instead of `gen` if we want control over the analysis / reasoning channel
        + gen()
        + special_token("<|end|>")
    )

def commentary():
    return (
        special_token("<|start|>")
        + "assistant"
        + special_token("<|channel|>")
        + "commentary"
        + special_token("<|message|>")
        # We could add a more specific grammar here instead of `gen` if we want control over the commentary channel
        + gen()
        + special_token("<|end|>")
    )

def final():
    return (
        special_token("<|start|>")
        + "assistant"
        + special_token("<|channel|>")
        + "final"
        + special_token("<|message|>")
        # We could add a more specific grammar here instead of `gen` if we want control over the final channel
        + gen()
        + special_token("<|return|>")
    )

Adding in support for tool calling (just JSON for now):

In [None]:
from typing import Any
from guidance import json

# https://cookbook.openai.com/articles/openai-harmony#receiving-tool-calls
def constrained_tool_call(name: str, schema: dict[str, Any]):
    return (
        special_token("<|start|>")
        + "assistant"
        + special_token("<|channel|>")
        + f"commentary to={name} "
        + special_token("<|constrain|>")
        + "json"
        + special_token("<|message|>")
        # Constrain the message format to match the JSON schema -- we could instead use other custom formats here
        + json(schema=schema)
        + special_token("<|call|>")
    )

Define a "top-level" guidance grammar that we can parametrize with tools

In [4]:
from dataclasses import dataclass
from typing import Callable
from guidance import optional, select

@dataclass
class Tool:
    name: str
    schema: dict[str, Any]
    description: str
    callable: Callable

def harmony_response_format(tools: list[Tool]):
    return (
        # Always include reasoning
        analysis()
        # Optional commentary
        + optional(commentary())
        # Either final output or constrained tool call
        + select([final(), *(constrained_tool_call(tool.name, tool.schema) for tool in tools)])
    )

Set up some tools

In [None]:
def get_weather(location: str) -> dict:
    # Simulated weather data
    weather_data = {
        "location": location,
        "temperature": "22°C",
        "condition": "Sunny",
    }
    return weather_data

weather_tool = Tool(
    name="get_weather",
    schema={
        "type": "object",
        "properties": {
            "location": {"type": "string"},
        },
        "required": ["location"],
        "additionalProperties": False,
    },
    description="Fetch the weather information for a specific location.",
    callable=get_weather
)

tools = [weather_tool]

Prepare our grammar into [llguidance/lark format](https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md)

In [10]:
guidance_grammar = harmony_response_format(tools)
llguidance_grammar = guidance_grammar.ll_grammar()

# Let's take a look
print(llguidance_grammar)

%llguidance {}

start: <|start|> "assistant" <|channel|> "analysis" <|message|> GEN <|end|> optional select
GEN: /(?s:.*)/
optional: (<|start|> "assistant" <|channel|> "commentary" <|message|> GEN <|end|>)?

select: <|start|> "assistant" <|channel|> "final" <|message|> GEN <|return|>
     | <|start|> "assistant" <|channel|> "commentary to=get_weather " <|constrain|> "json" <|message|> json <|call|>

json: %json {
  "type": "object",
  "properties": {
    "location": {
      "type": "string"
    }
  },
  "required": [
    "location"
  ],
  "x-guidance": {
    "whitespace_flexible": false,
    "whitespace_pattern": null,
    "item_separator": ", ",
    "key_separator": ": ",
    "coerce_one_of": true,
    "lenient": false
  }
}




Set up the conversation using harmony

In [8]:
from openai_harmony import (
    Conversation,
    DeveloperContent,
    Message,
    ReasoningEffort,
    Role,
    SystemContent,
    load_harmony_encoding,
    HarmonyEncodingName
)

enc = load_harmony_encoding(
    HarmonyEncodingName.HARMONY_GPT_OSS
)

convo = Conversation.from_messages(
    [
        Message.from_role_and_content(
            Role.SYSTEM,
            SystemContent.new()
                .with_reasoning_effort(ReasoningEffort.LOW)
        ),
        Message.from_role_and_content(
            Role.DEVELOPER,
            DeveloperContent.new()
                .with_instructions("You are a helpful assistant.")
                .with_function_tools([
                    {"name": tool.name, "description": tool.description, "parameters": tool.schema}
                    for tool in tools
                ])
        ),
        Message.from_role_and_content(
            Role.USER,
            "What's the weather like in Paris?"
        ),
    ]
)

In [9]:
# Prompt tokens for completion
# NOTE: harmony docs suggest using `enc.render_conversation_for_completion`, which includes the next turn role in the prompt.
# We don't need to do this, because we include this in the completion grammar itself!
prompt = enc.render_conversation(
    conversation=convo,
)

# Let's take a look
prompt_text = enc.decode(prompt)
print(prompt_text)

<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
Knowledge cutoff: 2024-06

Reasoning: low

# Valid channels: analysis, commentary, final. Channel must be included for every message.
Calls to these tools must go to the commentary channel: 'functions'.<|end|><|start|>developer<|message|># Instructions

You are a helpful assistant.

# Tools

## functions

namespace functions {

// Fetch the weather information for a specific location.
type get_weather = (_: {
location: string,
}) => any;

} // namespace functions<|end|><|start|>user<|message|>What's the weather like in Paris?<|end|>


In [None]:
import openai

client = openai.Client(
    base_url="URL_TO_YOUR_DEPLOYMENT"
)

response = client.completions.create(
    model="gpt-oss-120b",
    prompt=prompt,
    max_tokens=1024,
    logprobs=True,
    # We're using sglang, so we need to provide the llguidance grammar like so
    # -- please check the documentation of your favorite inference engine
    extra_body={"ebnf": llguidance_grammar},
)

# Note that we need to get tokens from the logprobs because they're currently 
# improperly formatted in the response's messages
# TODO: deal with lower-level utf-8 issues
tokens = enc.encode(text="".join(response.choices[0].logprobs.tokens), allowed_special="all")
messages = enc.parse_messages_from_completion_tokens(tokens=tokens, role=Role.ASSISTANT)

# Extend the conversation with our new messages
convo.messages += messages

# TODO

In [12]:
# Check if last message was a tool call. If so, call it!
last_message = messages[-1]
if last_message.recipient is not None:
    assert last_message.recipient.startswith("functions.")
    tool_name = last_message.recipient[len("functions."):]
    tool = next((t for t in tools if t.name == tool_name), None)
    assert tool is not None, f"Tool {tool_name} not found in tools list."


NameError: name 'messages' is not defined