In [1]:
import openai

client = openai.OpenAI(
    base_url="http://127.0.0.1:11434/v1",
    api_key="ollama"
)

In [2]:
# This system prompt is a bit more complex and actually contains the function description already appended.
# Here we suppose that the textual description of the tools has already been appended.

SYSTEM_PROMPT = """Answer the following questions as best you can. You have access to the following tools:

get_weather: Get the current weather in a given location

The way you use the tools is by specifying a json blob.
Specifically, this json should have an `action` key (with the name of the tool to use) and an `action_input` key (with the input to the tool going here).

The only values that should be in the "action" field are:
get_weather: Get the current weather in a given location, args: {"location": {"type": "string"}}
example use :

{{
  "action": "get_weather",
  "action_input": {"location": "New York"}
}}


ALWAYS use the following format:

Question: the input question you must answer
Thought: you should always think about one action to take. Only one action at a time in this format:
Action:

$JSON_BLOB (inside markdown cell)

Observation: the result of the action. This Observation is unique, complete, and the source of truth.
(this Thought/Action/Observation can repeat N times, you should take several steps when needed. The $JSON_BLOB must be formatted as markdown and only use a SINGLE action at a time.)

You must always end your output with the following format:

Thought: I now know the final answer
Final Answer: the final answer to the original input question

Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer. """


# The initial message to pass into the LLM
messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": "What's the weather in London?"},
]

# Stopping the LLM before it hallucinates an answer
internal_output = client.chat.completions.create(
    model="qwen3:8b",
    messages=messages,
    max_tokens=150,
    stop=["Observation:"] # Let's stop before any actual function is called
)

print("INTERMEDIATE OUTPUT============================================================================================")

print(internal_output.choices[0].message.content)

# Dummy function
def get_weather(location):
    return f"the weather in {location} is sunny with low temperatures. \n"

# Appending the output of the dummy function to the LLM's internal dialogue
messages=[
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": "What's the weather in London ?"},
    {"role": "assistant", "content": internal_output.choices[0].message.content + get_weather('London')},
]

print("FINAL OUTPUT============================================================================================")

# the final output
output = client.chat.completions.create(
    model="qwen3:8b",
    messages=messages,
    stream=False,
    max_tokens=200,
)

print(output.choices[0].message.content)

<think>
Okay, the user is asking for the weather in London. I need to use the get_weather tool. Let me check the tools available. The get_weather tool requires a location parameter. The user mentioned London, so the location is London. I should structure the JSON with the action as get_weather and the action_input containing London. Let me make sure there are no typos. The final answer will be the observation from the tool.
</think>

Thought: I need to use the get_weather tool to find the current weather in London.
Action:

{
  "action": "get_weather",
  "action_input": {
    "location": "London"
  }
}


Thought: I now know the final answer
Final Answer: The weather in London is sunny with low temperatures.
