In [1]:
from typing import Any
import ollama
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from pydantic_ai import Agent, RunContext
from pydantic_ai.usage import UsageLimits
from pydantic_ai.result import StreamedRunResult
from pydantic_ai.settings import ModelSettings


from pydanticai_ollama.models.ollama import OllamaModel
from pydanticai_ollama.settings.ollama import OllamaModelSettings
from pydanticai_ollama.providers.ollama import OllamaProvider

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
load_dotenv()

False

In [4]:
async def stream_response(agent: Agent[None, Any], question: str, deps: Any = None, usage_limits: UsageLimits | None = None) -> str:
    async with agent.run_stream(question, deps=deps, usage_limits=usage_limits) as response:
        async for text_delta in response.stream_text(delta=True):
            print(text_delta, end="", flush=True)
        
        return await response.get_output()


async def stream_structured(agent: Agent[Any, Any], question: str, deps: Any = None, usage_limits: UsageLimits | None = None) -> StreamedRunResult[Any, Any]:
    async with agent.run_stream(question, deps=deps, usage_limits=usage_limits) as response:
        final_message = []
        async for text_delta, last in response.stream_structured(debounce_by=0.01):
            print(f"---{text_delta}---", end="", flush=True)
            final_message.append(text_delta)
            if last:
                print(f"Final result: {final_message}")
        
        return response

In [5]:
response = ollama.chat(
    model="qwen3",
    messages=[
        {
            "role": "user", 
            "content": "What's the capitol of Hungary?", 
            # "images": ["/home/ariel/Downloads/kiwi.png"]  # Local path or URL
        }
    ],
    think=False
)

print(response["message"]["content"])

The capital of Hungary is **Budapest**.


In [4]:
response

ChatResponse(model='qwen3', created_at='2025-08-26T13:36:35.719202192Z', done=True, done_reason='stop', total_duration=3506190593, load_duration=2781357772, prompt_eval_count=24, prompt_eval_duration=155528949, eval_count=12, eval_duration=568403219, message=Message(role='assistant', content='The capital of Hungary is **Budapest**.', thinking=None, images=None, tool_name=None, tool_calls=None))

In [5]:
m = OllamaModel('qwen3', provider=OllamaProvider(base_url="http://localhost:11434"))
settings = OllamaModelSettings(temperature=0.0)
agent = Agent(m, instructions='You are a chef.', model_settings=settings)

result = await agent.run('I want a recipe to cook Uruguayan alfajores.')
result

AgentRunResult(output='\n\n**Uruguayan Alfajores Recipe**  \n*Traditional sweet cookies with a crispy, nutty crust and creamy dulce de leche filling.*  \n\n---\n\n### **Ingredients**  \n**For the dough:**  \n- 200g unsalted butter (softened)  \n- 100g granulated sugar  \n- 200g all-purpose flour  \n- 1 tsp vanilla extract (optional)  \n- 1 pinch salt  \n\n**For the nut coating:**  \n- 100g ground almonds or peanuts (store-bought or homemade)  \n- 50g powdered sugar (or granulated sugar, finely ground)  \n\n**For the filling (dulce de leche):**  \n- 1 cup (240ml) whole milk  \n- 1 cup (200g) granulated sugar  \n- 1/2 tsp vanilla extract  \n- 1/4 tsp salt  \n\n---\n\n### **Instructions**  \n\n#### **1. Make the Dough**  \n1. In a large bowl, cream the butter and sugar until light and fluffy.  \n2. Gradually add the flour, salt, and vanilla extract, mixing until a smooth, soft dough forms.  \n3. Shape the dough into a log (about 20cm long), wrap in plastic wrap, and chill for 30 minutes. 

In [6]:
result.all_messages()

[ModelRequest(parts=[UserPromptPart(content='I want a recipe to cook Uruguayan alfajores.', timestamp=datetime.datetime(2025, 8, 26, 13, 36, 50, 789457, tzinfo=datetime.timezone.utc))], instructions='You are a chef.'),
 ModelResponse(parts=[ThinkingPart(content="\nOkay, the user wants a recipe for Uruguayan alfajores. Let me start by recalling what alfajores are. They're a traditional sweet from Uruguay, right? I think they're similar to cookies or shortbread, but with a specific texture and ingredients. I remember that they usually have a dough made with butter, flour, and sometimes sugar, then rolled in a mixture of ground nuts and sugar. The key is the texture—crispy on the outside and soft inside.\n\nWait, I should confirm the exact ingredients. I think the dough is made with butter, flour, and maybe some sugar. Then they're rolled in a mixture of ground almonds or peanuts and sugar. The filling is usually dulce de leche, which is a sweet caramel sauce made from evaporated milk. Bu

In [7]:
print(result.output)



**Uruguayan Alfajores Recipe**  
*Traditional sweet cookies with a crispy, nutty crust and creamy dulce de leche filling.*  

---

### **Ingredients**  
**For the dough:**  
- 200g unsalted butter (softened)  
- 100g granulated sugar  
- 200g all-purpose flour  
- 1 tsp vanilla extract (optional)  
- 1 pinch salt  

**For the nut coating:**  
- 100g ground almonds or peanuts (store-bought or homemade)  
- 50g powdered sugar (or granulated sugar, finely ground)  

**For the filling (dulce de leche):**  
- 1 cup (240ml) whole milk  
- 1 cup (200g) granulated sugar  
- 1/2 tsp vanilla extract  
- 1/4 tsp salt  

---

### **Instructions**  

#### **1. Make the Dough**  
1. In a large bowl, cream the butter and sugar until light and fluffy.  
2. Gradually add the flour, salt, and vanilla extract, mixing until a smooth, soft dough forms.  
3. Shape the dough into a log (about 20cm long), wrap in plastic wrap, and chill for 30 minutes.  

#### **2. Prepare the Nut Coating**  
1. In a small 

In [8]:
from pydantic_ai import ImageUrl


m = OllamaModel('gemma3', provider=OllamaProvider(base_url="http://localhost:11434"))
agent = Agent(m)

result = await agent.run(
    [
        'What is the name of this fruit?',
        ImageUrl(url='https://t3.ftcdn.net/jpg/00/85/79/92/360_F_85799278_0BBGV9OAdQDTLnKwAPBCcg1J7QtiieJY.jpg'),
    ]
)
result

AgentRunResult(output="That's a potato! \n\nSpecifically, it looks like a Yukon Gold potato. 😊")

In [5]:
ollama_model = OllamaModel(
    model_name="qwen3:4b-instruct",
    provider=OllamaProvider(base_url="http://localhost:11434"),
    settings=OllamaModelSettings(
        temperature=0.0, 
        # num_predict=30, 
        think=False
    )
)

ollama_model

<pydanticai_ollama.models.ollama.OllamaModel at 0x772bf89c6d80>

In [6]:
def get_weather(city: str = Field(..., description="The city to get the weather for")) -> str:
    """Gets the weather for a given city."""
    if city == "New York":
        return "It's sunny in New York!"
    return f"I don't know the weather for {city}."

In [7]:
from pydantic_ai.tools import Tool
from pydantic_ai.messages import (
    ModelRequest,
    ModelResponse,
    UserPromptPart,
    SystemPromptPart,
    ToolCallPart,
    ToolReturnPart,
    TextPart,
    ModelMessage,
)
from pydantic_ai.models import ModelRequestParameters

get_weather_tool = Tool(get_weather)

# 2. First call from the agent with a user prompt
messages: list[ModelMessage] = [
    ModelRequest(parts=[SystemPromptPart(content="You are a helpful assistant.")]),
    ModelRequest(parts=[UserPromptPart(content="What's the weather in New York?")]),
]
request_params = ModelRequestParameters(
    function_tools=[get_weather_tool.tool_def],
)

response_1 = await ollama_model.request(messages, ollama_model.settings, request_params)
response_1

ModelResponse(parts=[ToolCallPart(tool_name='get_weather', args={'city': 'New York'}, tool_call_id='pyd_ai_be855d7a7f9a483dbcbf7ea08d01e2e2')], usage=RequestUsage(input_tokens=157, output_tokens=21), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 11, 508241, tzinfo=datetime.timezone.utc))

In [8]:
class CityLocation(BaseModel):
    city: str
    country: str


agent = Agent(ollama_model, output_type=CityLocation)

result = await agent.run("Where were the olympics held in 2012?")
result

AgentRunResult(output=CityLocation(city='London', country='United Kingdom'))

In [9]:
result.usage()

RunUsage(input_tokens=20, output_tokens=18, requests=1)

In [10]:
result.output

CityLocation(city='London', country='United Kingdom')

In [11]:
agent = Agent(ollama_model, output_type=str)

result = await agent.run("Where were the olympics held in 2012?")
result

AgentRunResult(output='The 2012 Summer Olympics were held in London, United Kingdom. The games took place from July 27 to August 12, 2012, and were officially known as the Games of the XXX Olympiad. London became the first city to host the Summer Olympics three times, having previously hosted in 1908 and 1948. The 2012 Games featured events across 8 different venues in London and surrounding areas, with the main stadium being the Olympic Stadium at the Olympic Park in Stratford, East London. The event was celebrated for its successful organization, sustainability efforts, and vibrant cultural programming. \n\nNote: The Winter Olympics were not held in 2012. The Winter Olympics in 2012 were actually held in 2010 (Vancouver), and the next Winter Olympics were in 2014 (Sochi). So, only the Summer Olympics were held in 2012, and they were in London.')

In [12]:
agent = Agent(ollama_model, output_type=str)
result = await stream_response(
    agent, 
    'Where were the olympics held in 2012?', 
    # usage_limits=UsageLimits(response_tokens_limit=270)
)
result

The 2012 Summer Olympics were held in London, United Kingdom. The games took place from July 27 to August 12, 2012, and were officially known as the Games of the XXX Olympiad. London became the first city to host the Summer Olympics three times, having previously hosted in 1908 and 1948. The 2012 Games featured events across 8 different venues in London and surrounding areas, with the main stadium being the Olympic Stadium at the Olympic Park in Stratford, East London. The event was celebrated for its successful organization, sustainability efforts, and vibrant cultural programming. 

Note: The Winter Olympics were not held in 2012. The Winter Olympics in 2012 were actually held in 2010 (Vancouver), and the next Winter Olympics were in 2014 (Sochi). So, only the Summer Olympics were held in 2012, and they were in London.

'The 2012 Summer Olympics were held in London, United Kingdom. The games took place from July 27 to August 12, 2012, and were officially known as the Games of the XXX Olympiad. London became the first city to host the Summer Olympics three times, having previously hosted in 1908 and 1948. The 2012 Games featured events across 8 different venues in London and surrounding areas, with the main stadium being the Olympic Stadium at the Olympic Park in Stratford, East London. The event was celebrated for its successful organization, sustainability efforts, and vibrant cultural programming. \n\nNote: The Winter Olympics were not held in 2012. The Winter Olympics in 2012 were actually held in 2010 (Vancouver), and the next Winter Olympics were in 2014 (Sochi). So, only the Summer Olympics were held in 2012, and they were in London.'

In [20]:
ollama_think_model = OllamaModel(
    model_name="qwen3",
    provider=OllamaProvider(base_url="http://localhost:11434"),
    settings=OllamaModelSettings(
        temperature=0.0, 
        # num_predict=30, 
        # think=True
    )
)

# ollama_think_model = OpenAIModel(
#     model_name='qwen3',
#     provider=ollama.OllamaProvider(base_url='http://localhost:11434/v1'),
#     settings=ModelSettings(temperature=0.0)
# )

agent = Agent(ollama_think_model, output_type=str)

# result = await agent.run("What is your name?")
# result

result = await stream_response(
    agent, 
    'What is your name?', 
    # usage_limits=UsageLimits(response_tokens_limit=270)
)
result



My name is Qwen, and I'm a large language model developed by Alibaba Cloud. I'm here to help with various tasks and answer questions. How can I assist you today? 😊

"\n\nMy name is Qwen, and I'm a large language model developed by Alibaba Cloud. I'm here to help with various tasks and answer questions. How can I assist you today? 😊"

In [13]:
from pydantic import ValidationError


class CityLocation(BaseModel):
    city: str
    country: str

agent = Agent(ollama_model, output_type=CityLocation)

result = await stream_structured(
    agent, 
    'Where were the olympics held in 2012?', 
    # usage_limits=UsageLimits(response_tokens_limit=270)
)
result

# async def main():
#     user_input = 'Where were the olympics held in 2012?'
#     async with agent.run_stream(user_input) as result:
#         async for message, last in result.stream_structured(debounce_by=0.01):  
#             try:
#                 city = await result.validate_structured_output(  
#                     message,
#                     allow_partial=not last,
#                 )
#             except ValidationError:
#                 continue
#             print(city)

# await main()

---ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'city': 'London'}, tool_call_id='pyd_ai_d16ad0da03f6423db7fbdf7fbb104487')], usage=RequestUsage(), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 34, 738701, tzinfo=datetime.timezone.utc), provider_name='ollama')------ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'city': 'London'}, tool_call_id='pyd_ai_b7b1b6315f9b46f1abaa21bbeb976f11')], usage=RequestUsage(), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 34, 738701, tzinfo=datetime.timezone.utc), provider_name='ollama')------ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'city': 'London'}, tool_call_id='pyd_ai_0dbc4cb901da4ba887ef55be389c69c4')], usage=RequestUsage(), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 34, 738701, tzinfo=datetime.timezone.utc), provider_name='ollama')------ModelResponse(parts=[ToolCallPart(tool_name

StreamedRunResult(_all_messages=[ModelRequest(parts=[UserPromptPart(content='Where were the olympics held in 2012?', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 34, 645053, tzinfo=datetime.timezone.utc))]), ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'city': 'London', 'country': 'United Kingdom'}, tool_call_id='pyd_ai_ef88dcb39c384d7d95ffc1a1f14a0f02')], usage=RequestUsage(input_tokens=20, output_tokens=18), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 34, 738701, tzinfo=datetime.timezone.utc), provider_name='ollama'), ModelRequest(parts=[ToolReturnPart(tool_name='final_result', content='Output tool not used - a final result was already processed.', tool_call_id='pyd_ai_ef88dcb39c384d7d95ffc1a1f14a0f02', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 34, 990239, tzinfo=datetime.timezone.utc))])], _new_message_index=0, _stream_response=AgentStream(_raw_stream_response=OllamaStreamedResponse(model_request_parameters=ModelRe

In [14]:
result.usage()

RunUsage(input_tokens=20, output_tokens=18, requests=1)

In [15]:
await result.get_output()

CityLocation(city='London', country='United Kingdom')

In [16]:
roulette_agent = Agent(  
    ollama_model,
    deps_type=int,
    output_type=bool,
    system_prompt=(
        'Use the `roulette_wheel` function to see if the '
        'customer has won based on the number they provide.'
    ),
)

@roulette_agent.tool(retries=1)
async def roulette_wheel(ctx: RunContext[int], square: int) -> bool:  
    """check if the square is a winner"""
    print(f"Checking if {square} is a winner")
    return square == ctx.deps

success_number = 18

In [17]:
# Run the agent
result = await roulette_agent.run('Put my money on square eighteen', deps=success_number)
print(result.output)

Checking if 18 is a winner
True


In [18]:
result.usage()

RunUsage(input_tokens=237, output_tokens=28, requests=2)

In [19]:
result = await stream_structured(roulette_agent, 'Put my money on square eighteen', deps=success_number)
print(result.usage())
print(await result.get_output())

Checking if 18 is a winner
---ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'response': True}, tool_call_id='pyd_ai_e02ae338800e4c708576734022237711')], usage=RequestUsage(), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 52, 391373, tzinfo=datetime.timezone.utc), provider_name='ollama')------ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'response': True}, tool_call_id='pyd_ai_33bb33be94bb4acd8f5523790063fc94')], usage=RequestUsage(), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 52, 391373, tzinfo=datetime.timezone.utc), provider_name='ollama')------ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'response': True}, tool_call_id='pyd_ai_33bb33be94bb4acd8f5523790063fc94')], usage=RequestUsage(input_tokens=73, output_tokens=7), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 52, 52, 391373, tzinfo=datetime.timezone.utc), provider_name

In [20]:
result = await roulette_agent.run('I bet five is the winner', deps=success_number)
print(result.output)

Checking if 5 is a winner
False


In [21]:
result.usage()

RunUsage(input_tokens=236, output_tokens=26, requests=2)

In [22]:
import random

class Result(BaseModel):
    guessesd_number: int = Field(..., description="Guessed number by the user")
    rolled_number: int = Field(..., description="Rolled number")
    result: bool = Field(..., description="Guessed number matches the rolled number")
    player_name: str = Field(..., description="Player's name")

agent = Agent(
    ollama_model,  
    deps_type=str,
    output_type=Result,  
    system_prompt=(
        "You're a dice game, you should roll the die and see if the number "
        "you get back matches the user's guess. If so, tell them they're a winner. "
        "Use the player's name in the response."
    ),
)


@agent.tool
def roll_dice(ctx: RunContext[str]) -> str:
    """Roll a six-sided die and return the result."""
    rolled_number = random.randint(1, 6)
    print(rolled_number)
    return str(rolled_number)


@agent.tool  
def get_player_name(ctx: RunContext[str]) -> str:
    """Get the player's name."""
    return ctx.deps

dice_result = await agent.run('My guess is 4', deps='Ariel')  
print(dice_result.output)

2
guessesd_number=4 rolled_number=2 result=False player_name='Ariel'


In [23]:
dice_result.usage()

RunUsage(input_tokens=338, output_tokens=65, requests=2)

In [24]:
result = await stream_structured(agent, 'My guess is 4', deps='Ariel')
print(result.usage())
print(await result.get_output())

5
---ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'guessesd_number': 4}, tool_call_id='pyd_ai_8459bed4582146fc9b7e379d9e89213a')], usage=RequestUsage(), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 53, 1, 133789, tzinfo=datetime.timezone.utc), provider_name='ollama')------ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'guessesd_number': 4}, tool_call_id='pyd_ai_96132a14062a4d939adabf6948522b50')], usage=RequestUsage(), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 53, 1, 133789, tzinfo=datetime.timezone.utc), provider_name='ollama')------ModelResponse(parts=[ToolCallPart(tool_name='final_result', args={'guessesd_number': 4}, tool_call_id='pyd_ai_8d8729fca4134879912653808d7437a8')], usage=RequestUsage(), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 53, 1, 133789, tzinfo=datetime.timezone.utc), provider_name='ollama')------ModelResponse(parts=[ToolCallPar

In [25]:
import asyncio
from pydantic import BaseModel, Field
from pydantic_ai import Agent
from pydanticai_ollama.models.ollama import OllamaModel
from pydanticai_ollama.providers.ollama import OllamaProvider

# 1. Define a Pydantic model for your tool's arguments
class GetCurrentWeatherArgs(BaseModel):
    location: str = Field(..., description="The city and state, e.g. San Francisco, CA")
    unit: str = Field(default="celsius", description="The unit of temperature (celsius or fahrenheit) to return")

async def tool_use_example():
    ollama_model = OllamaModel(model_name='qwen3:4b-instruct') # Ensure model supports function calling (e.g., qwen)
    agent = Agent(ollama_model)

    @agent.tool
    def get_current_weather(ctx: RunContext[str], args: GetCurrentWeatherArgs) -> str:
        """Get the current weather in a given location."""
        # In a real application, this would call an external weather API
        print(args)
        if "San Francisco" in args.location:
            return f"24 degrees {args.unit} and sunny in San Francisco."
        else:
            return f"28 degrees {args.unit} and cloudy in {args.location}."

    print("Tool use example:")
    result = await agent.run("What's the weather like in San Francisco?")
    print(result.output)


# If the model decides to call the tool, result.tool_calls will contain the call
# if result.tool_calls:
#     for tool_call in result.tool_calls:
#         print(f"Model called tool: {tool_call.tool_name} with args: {tool_call.args}")
#         # Execute the tool and send the result back to the model
#         tool_result = tool_call.call()
#         final_response = await agent.run(tool_result)
#         print(f"Final response after tool execution: {final_response.output}")

In [26]:
await tool_use_example()

Tool use example:
location='San Francisco' unit='celsius'
The current weather in San Francisco is 24 degrees Celsius and sunny! 🌞 That's quite pleasant—perfect for outdoor activities. Just be mindful of the coastal breeze, which can make it feel slightly cooler even if the temperature is warm. 😊


In [27]:
result

StreamedRunResult(_all_messages=[ModelRequest(parts=[SystemPromptPart(content="You're a dice game, you should roll the die and see if the number you get back matches the user's guess. If so, tell them they're a winner. Use the player's name in the response.", timestamp=datetime.datetime(2025, 8, 27, 10, 53, 0, 514828, tzinfo=datetime.timezone.utc)), UserPromptPart(content='My guess is 4', timestamp=datetime.datetime(2025, 8, 27, 10, 53, 0, 514831, tzinfo=datetime.timezone.utc))]), ModelResponse(parts=[ToolCallPart(tool_name='get_player_name', args={}, tool_call_id='pyd_ai_9b4c345482fe4800aae5dcbbd558472f'), ToolCallPart(tool_name='roll_dice', args={}, tool_call_id='pyd_ai_d408956b2a4b4bfeace645f0f584f85f')], usage=RequestUsage(input_tokens=219, output_tokens=33), model_name='qwen3:4b-instruct', timestamp=datetime.datetime(2025, 8, 27, 10, 53, 0, 801981, tzinfo=datetime.timezone.utc), provider_name='ollama'), ModelRequest(parts=[ToolReturnPart(tool_name='get_player_name', content='Ariel