# Lesson 5: Tool calling with Pydantic Models and GenAI (OpenAI from the course)

The goals:
    1. Use Pydantic models to define tool schemas for GenAI's tool calling API
    2. Register your tool with the API using validated schema
    3. Handle tool calls and validate arguments with Pydantic
    4. Integrate LLM-driven workflows with your own python functions and data sources


In [1]:
# Import all the libraries
from pydantic import BaseModel, Field, EmailStr, field_validator
from pydantic_ai import Agent
from typing import Literal, List, Optional
from datetime import date, datetime
import json
from google import genai
import instructor
from dotenv import load_dotenv
# Load environmental variables
load_dotenv(dotenv_path="C:\\Users\\yawen\\Documents\\Learning\\Pydantic\\geminiai.env")
import nest_asyncio
nest_asyncio.apply()


In [2]:
# Define your UserInput model
class UserInput(BaseModel):
    name: str = Field(..., description = "User's name")
    email: str = Field(..., description = "User's email address")
    query: str = Field(..., description = "User's query")
    order_id: Optional[str] = Field(
        None,
        description = "Order ID if available (format: ABC-12345)" 
    )
    # Validate order_id format
    @field_validator("order_id")
    def validate_order_id(cls, order_id):
        import re
        if order_id is None:
            return order_id
        pattern = r"^[A-Z]{3}-\d{5}$"

        if not re.match(pattern, order_id):
            raise ValueError(
                "order_id must be in format ABC-12345"
                "(3 uppercase letters, dash, 5 digits)"
            )

        return order_id
    purchase_date: Optional[datetime] = Field(None)
    # Validate purchase_date format
    @field_validator('purchase_date', mode = 'before')
    def parse_purchase_date(cls, v):
        if isinstance(v, str):
            # Convert 'Z' (UTC) to '+00:00' for compatibility with fromisoformat
            return datetime.fromisoformat(v.replace('Z', '+00:00'))        

In [3]:
# Define CustomerQuery model
class CustomerQuery(UserInput):
    priority: str = Field(
        ..., description = "Priority level: low, medium, high"
    )
    category: Literal[
        'refund_request', 'information_request', 'other'
    ] = Field (..., description = "Query category")
    is_complaint: bool = Field (
        ..., description = "whether this is a complaint"
    )
    tags: list[str] = Field(
        ..., description = "Relevant keywords tags"
    )

# Validate user input and create a CustomerQuery instance

In [4]:
# Define a function to validate user input
def validate_user_input(user_json: str):
    """ Validate user input from a json string and return a UserInput instance if valid  
    """

    try:
        user_input = UserInput.model_validate_json(user_json)
        print("User input validated ...")

        return user_input
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

In [5]:
# Define a function to call an LLM using pydantic ai to ccreate an instance of CustomerQuery
def create_customer_query(valid_user_json: str):
    customer_query_agent = Agent(
        model="google-gla:gemini-2.5-pro",
        output_type = CustomerQuery,
    )
    response = customer_query_agent.run_sync(valid_user_json)
    print("CustomerQuery generated ...")
    return response.output
    

In [6]:
# Configure the google generaltive AI client
import os
api_key = os.getenv("GOOGLE_API_KEY")


# Try out your validation and query creation with creation with sample input

In [7]:
# Define user input json data
user_input_json = '''{
    "name": "Joe User",
    "email": "joe@example.com",
    "query": "When can I expect delivery of the headphones I ordered?",
    "order_id": "ABC-12345",
    "purchase_date": "2025-01-10"  
} 
'''
# Validate user input and create a customerquery
valid_data = validate_user_input(user_input_json).model_dump_json()
customer_query = create_customer_query(valid_data)
print(type(customer_query))
print(customer_query.model_dump_json(indent=2))

User input validated ...
CustomerQuery generated ...
<class '__main__.CustomerQuery'>
{
  "name": "Joe User",
  "email": "joe@example.com",
  "query": "When can I expect delivery of the headphones I ordered?",
  "order_id": "ABC-12345",
  "purchase_date": "2025-01-10T00:00:00",
  "priority": "low",
  "category": "information_request",
  "is_complaint": false,
  "tags": [
    "delivery",
    "headphones"
  ]
}


In [8]:
print(valid_data)

{"name":"Joe User","email":"joe@example.com","query":"When can I expect delivery of the headphones I ordered?","order_id":"ABC-12345","purchase_date":"2025-01-10T00:00:00"}


# Define tool input models for FAQ lookup and order status

In [9]:
# Define FAQ lookup tool input as a pydantic model
class FAQLookupArgs(BaseModel):
    query: str = Field(..., description = "User's query")
    tags: List[str] = Field(..., description = "Relevant keyword tag")


In [10]:
# Define Check Order Status tool input as a Pydantic model
class CheckOrderStatusArgs(BaseModel):
    order_id: Optional[str] = Field(
        None,
        description = "Order ID if available (format: ABC-12345)" 
    )
    email: str = Field(..., description = "User's email")
    # Validate order_id format
    @field_validator("order_id")
    def validate_order_id(cls, order_id):
        import re
        if order_id is None:
            return order_id
        pattern = r"^[A-Z]{3}-\d{5}$"

        if not re.match(pattern, order_id):
            raise ValueError(
                "order_id must be in format ABC-12345"
                "(3 uppercase letters, dash, 5 digits)"
            )

        return order_id

In [11]:
# Create a fake FAQ database as a list of entries with keywords
faq_db = [
    {
        "question": "How can I reset my password?",
        "answer": "To reset your password, click 'Forgot Password' on the sign-in page and follow the instructions sent to your email.",
        "keywords": ["password", "reset", "account"]
    },
    {
        "question": "How long does shipping take?",
        "answer": "Standard shipping takes 3-5 business days. You can track your order in your account dashboard.",
        "keywords": ["shipping", "delivery", "order", "tracking"]
    },
    {
        "question": "How can I return an item?",
        "answer": "You can return any item within 30 days of purchase. Visit our returns page to start the process.",
        "keywords": ["return", "refund", "exchange"]
    },
    {
        "question": "How can I delete my account?",
        "answer": "To delete your account, go to your account settings tab and select 'delete account'.",
        "keywords": ["delete", "account", "remove"]
    }
]

# Create a fake order database
order_db = {
    "ABC-12345": {
        "status": "shipped", "estimated_delivery": "2025-12-05",
        "purchase_date": "2025-12-01", "email": "joe@example.com"
    },
    "XYZ-23456": {
        "status": "processing", "estimated_delivery": "2025-12-15",
        "purchase_date": "2025-12-10", "email": "sue@example.com"
    },
    "QWE-34567": {
        "status": "delivered", "estimated_delivery": "2025-12-20",
        "purchase_date": "2025-12-18", "email": "bob@example.com"
    }
}

# Implement tool functions for FAQ lookup and order status

In [12]:
# Define your FAQ lookup tool
def lookup_faq_answer(args: FAQLookupArgs) -> str:
    """ Look up an FAQ answer by matching tags and words in query to FAQ entry keywords"""
    query_words = set(word.lower() for word in args.query.split()) 
    tar_set = set(tag.lower() for tag in args.tag)
    best_match = None
    best_score = 0
    for faq in faq_db:
        keywords = set(k.lower() for k in faq["keywords"])
        score = len(keywords & tag_set) + len(keywords & query_words)

        if score > best_score:
            best_score = score
            best_match = faq
    if best_match and best_score > 0:
        return best_match["answer"]
    return "Sorry, I couldn't find a FAQ answer."

In [13]:
# Define check order status
def check_order_status(args: CheckOrderStatusArgs) -> str:
    """Stimulate checking the status of a customer's order by order_id and email """
    order = order_db.get(args.order_id)
    if not order:
        return {
            "order_id": args.order_id,
            "status": "not found",
            "estimated_delivery": None,
            "note": "order_id not found"
        }

    if args.email.lower() != order_db.get("email", "").lower():
        return {
            "order_id": args.order_id,
            "status": order["status"],
            "estimated_delivery": order["estimated_delivery"],
            "note": "order_id found but email mismatch"
        }

    return {
        "order_id": args.order_id,
        "status": order["status"],
        "estimated_delivery": order["estimated_delivery"],
        "note": "order_id and email match"
    }        

# Define tool schemas for GenAI tool calling

In [30]:
# Define tools for your API call 
tool_definitions = [
    {
        "type": "function",
        "function": {
            "name": "lookup_faq_answer",
            "description": "Look up an FAQ answer by matching tags to FAQ entry keywords.",
            "parameters": FAQLookupArgs.model_json_schema()
        }
    },
    {
        "type": "function",
        "function": {
            "name": "check_order_status",
            "description": "Check the status of a customer's order.",
            "parameters": CheckOrderStatusArgs.model_json_schema()
        }
    }
]

In [15]:
from google.genai import types
# Define function declarations for your tools
lookup_faq_function = types.FunctionDeclaration(
    name="lookup_faq_answer",
    description="Look up an FAQ answer by matching tags to FAQ entry keywords.",
    parameters=FAQLookupArgs.model_json_schema()
)

check_order_status_function = types.FunctionDeclaration(
    name="check_order_status",
    description="Check the status of a customer's order.",
    parameters=CheckOrderStatusArgs.model_json_schema()
)

tool = [types.Tool(functionDeclarations=[lookup_faq_function, check_order_status_function])]

In [16]:
# Define final output pydantic models
class OrderDetails(BaseModel):
    status: str
    estimated_delivery: str
    note: str

class SupportTicket(CustomerQuery):
    recommended_next_action: Literal['escalate_to_agent', 'send_faq_response', 
        'send_order_status', 'no_action_needed'] = Field(..., description = "LLM's recommeded next action for support")
    order_details: Optional[OrderDetails] = Field(None, description = "Order details if action is send_order_status")
    faq_response: Optional[str] = Field(None, description = "FAQ response if action is send_faq_response")
    creation_date: datetime = Field(..., description = "Date and time the ticket was created")
    

# Decided on the next support action using Google GenAI tool calling


In [17]:
# Initialize GenAI client
client = genai.Client(api_key=api_key)

def decide_next_action_with_tools(customer_query: CustomerQuery):
    support_ticket_schema = json.dumps(
        SupportTicket.model_json_schema(), indent=2
    )
    system_prompt = f""" 
        You are a helpful customer support agent. Your job is to 
        determine what support action should be taken for the customer, 
        based on the customer query and the expected fields in the 
        SupportTicket schema below. If more information on a particular 
        order_id or FAQ response would be helpful in responding to the 
        user query and can be obtained by calling a tool, call the 
        appropriate tool to get that information. If an order_id is 
        present in the query, always look up the order status to get 
        more information on the order.

        Here is the JSON schema for the SupportTicket model you must 
        use as context for what information is expected:
        {support_ticket_schema}    
    """

    messages = [
        {"role": "model", "content": system_prompt},
        {"role": "user", "content": str(customer_query.model_dump())}
    ]

    # Convert to Google GenAI contents format
    contents = [
       types.Content(
            role=msg["role"],
            parts=[types.Part(text=msg["content"])]
        )
        for msg in messages
    ]


    tool_config = types.ToolConfig(
        function_calling_config=types.FunctionCallingConfig(mode='ANY')
    )

    response = client.models.generate_content(
        model = 'gemini-2.5-pro',
        contents = contents,
        config=types.GenerateContentConfig(
            tools=tool,
            # Enable automatic function calling.
            automatic_function_calling=types.AutomaticFunctionCallingConfig(disable=False), 
            tool_config=tool_config,
            )
        
    )

    message = response.candidates[0].content
    tool_calls = response.candidates[0].content.parts[0].function_call
 
    return message, tool_calls, contents
    #return response

In [18]:
# Call the decide_next_action_with_tools function
message, tool_calls, messages = decide_next_action_with_tools(
    customer_query
)
# Investigate the LLM's outputs before proceeding
print("LLM message:\n", message.model_dump())
print(
    "\nTool calls:\n", 
    tool_calls.model_dump()
)

LLM message:
 {'parts': [{'video_metadata': None, 'thought': None, 'inline_data': None, 'file_data': None, 'thought_signature': b'\n\xfc\x06\x01T\xa8\\\xee\x7f)\xf2R\x10\xd537\xac\xf5\x82t-\xde\x02\xfa:y\x01\xf8Mi\x10\x82\xadx\x9ao\xe3\x96\x0c\x9a\x93\xb5\xb2i\xa4\x88\xc1s\xc7\xd8d=\xcd\xee\xf5\xa6\x0e\xc9\x139@ \xa5\xfb\xf8\xd7Ra\xd0S\xbaCz\x18\x04\x07\xe5fc\xa6\x87\xb5\xc8\xf1<\xe8\xee\xe6c\xb2\x8d\x93(\x01\xdc6\x00\x8d\x1d#q\xa2\xd0V\xb8\xa0\x82r\x16\x07\x82~\xb2V\xb9\xaf\xe4\xee\xc9\x96td\t\xc5\x87|R\xe8:tF\xa1\x8do\x836\xa8\xa4hx\xe4\xee\x8e\x7f\x11e\xeb9D\xd2\x06I!Q^\xc88\xda\xc3\x0cVu\xec\xeb\xff\x08\x88\xfc\xa3a\x9a1\xdbv\xe3\x89\x06\xdb\xe6;\xb5\xeb\xc2\x93\x1a\xea\xff_\x96\x895\xf41\xcc\\\x15\xdb\xfb\r9\xf4K3p\x17?\xea\xc9\xbc%rd\xc0T).\xb9\xdf\xfc\xac\x82\xdc\xd6zpd\xe5\x97\xc2\x13V\x86\x89\xf8I\xf4\x93\xda\xfful\xe8#G>B\xa2\xf5\x97\x9c6Y\xd1\xccq\xc3\xab\xd3\x17\xf8\xa3/5\x82\xc1L\x01!U\xa6l"?#Ce\xd4\tmS\xc2\xe4\xcaDA\x8e\x93\xa6a\x15Z)\x04_\x04\xed\xcbz?p]\xeaW\xa9\x9c\xe1

In [None]:
# Create the Anthropic client with Instructor
anthropic_client = instructor.from_anthropic(
    anthropic.Anthropic()
)

# Define a function to call Anthropic to generate a support ticket
def generate_structured_support_ticket(
    customer_query: CustomerQuery, message, tool_outputs: list
):
    tool_results_str = "\n".join([
        f"Tool: {out['tool_call_id']} Output: {json.dumps(out['output'])}"
        for out in tool_outputs
    ]) if tool_outputs else "No tool calls were made."
    # Concatenate prompt parts into a single string for Anthropic
    prompt = f"""
        You are a support agent. Use all information below to 
        generate a support ticket as a validated Pydantic model.
        Customer query: {customer_query.model_dump_json(indent=2)}
        LLM message: {str(message.content)}
        Tool results: {tool_results_str}
    """
    # Create the message with structured output
    response = anthropic_client.messages.create(
        model="claude-3-7-sonnet-latest",  
        max_tokens=1024,
        messages=[
            {
                "role": "user", 
                "content": prompt
            }
        ],
        response_model=SupportTicket
    )
    
    support_ticket = response
    support_ticket.creation_date = datetime.now()
    return support_ticket

In [110]:
messages

[{'role': 'model',
  'content': ' \n        You are a helpful customer support agent. Your job is to \n        determine what support action should be taken for the customer, \n        based on the customer query and the expected fields in the \n        SupportTicket schema below. If more information on a particular \n        order_id or FAQ response would be helpful in responding to the \n        user query and can be obtained by calling a tool, call the \n        appropriate tool to get that information. If an order_id is \n        present in the query, always look up the order status to get \n        more information on the order.\n\n        Here is the JSON schema for the SupportTicket model you must \n        use as context for what information is expected:\n        {\n  "$defs": {\n    "OrderDetails": {\n      "properties": {\n        "status": {\n          "title": "Status",\n          "type": "string"\n        },\n        "estimated_delivery": {\n          "title": "Estimated D

# Gather tool outputs and prepare for ticket generation

In [19]:
tool_calls.name

'check_order_status'

In [32]:
# Define a function to get tool outputs
def get_tool_outputs(tool_calls):
    tool_outputs = []
    if tool_calls:
        if tool_calls.name == "lookup_faq_answer":
            print("Agent requested a call to the Lookup FAQ tool...")
            args = FAQLookupArgs.model_validate_json(
                json.dumps(tool_calls.args)
            )
            result = lookup_faq_answer(args)
            tool_outputs.append(
                {
                    "tool_call_id": tool_calls.id, "output": result
                }
            )
            print(f"Lookup FAQ tool returned {result}")

        elif tool_calls.name == "check_order_status":
            print("Agent requested a call to Check Order Status tool...")
            args = CheckOrderStatusArgs.model_validate_json(json.dumps(tool_calls.args))
            result = check_order_status(args)
            tool_outputs.append(
                {
                    "tool_call_id": tool_calls.id, "output": result
                }
            )
            print(f"Check Order Status tool returned {result}")

        return tool_outputs

tool_outputs = get_tool_outputs(tool_calls)        

# Print tool outputs for inspection
print("Tool outputs:\n", json.dumps(tool_outputs, indent=2))

Agent requested a call to Check Order Status tool...
Check Order Status tool returned {'order_id': 'ABC-12345', 'status': 'shipped', 'estimated_delivery': '2025-12-05', 'note': 'order_id found but email mismatch'}
Tool outputs:
 [
  {
    "tool_call_id": null,
    "output": {
      "order_id": "ABC-12345",
      "status": "shipped",
      "estimated_delivery": "2025-12-05",
      "note": "order_id found but email mismatch"
    }
  }
]


In [30]:
tool_calls.args
args_json = json.dumps(tool_calls.args)
args_json

'{"order_id": "ABC-12345", "email": "joe@example.com"}'

# Generate a structured support ticket using Google GenAI

In [68]:

def generate_structured_support_ticket(
    customer_query: CustomerQuery, message, tool_outputs: list
):
    tool_results_str = "\n".join([
        f"Tool: {out['tool_call_id']} Output: {json.dumps(out['output'])}"
        for out in tool_outputs
    ]) if tool_outputs else "No tool calls were made."

    llm_message_text = "".join(part.text or "" for part in message.parts)

    prompt = f"""
        You are a support agent. Use all information below to 
        generate a support ticket as a validated Pydantic model.
        Customer query: {customer_query.model_dump_json(indent=2)}
        LLM message: {llm_message_text}
        Tool results: {tool_results_str}
    """

    contents = [
       types.Content(
            role="user",
            parts=[types.Part(text=prompt)]
        )
    ]

    response = client.models.generate_content(
        model='gemini-2.5-pro',
        contents=contents,
        config=types.GenerateContentConfig(
            max_output_tokens=2048,
            response_mime_type='application/json',
            response_schema=SupportTicket,
        )
    )

    # Extract the generated JSON string from response
    generated_json_str = None
    if response.candidates and response.candidates[0].content and response.candidates[0].content.parts:
        generated_json_str = response.candidates[0].content.parts[0].text
    else:
        raise ValueError("No content generated by model.")

    # Parse JSON string into SupportTicket model instance
    support_ticket = SupportTicket.model_validate_json(generated_json_str)

    # Add creation timestamp (adjust field name to your model definition)
    support_ticket.creation_date = datetime.now()

    return support_ticket


# Print final support ticket

In [69]:
# Run the final step of generating a support ticket and print output
support_ticket = generate_structured_support_ticket(
    customer_query, message, tool_outputs
)
print(support_ticket.model_dump_json(indent=2))

{
  "name": "Joe User",
  "email": "joe@example.com",
  "query": "I'm really not happy with this product I bought",
  "order_id": "QWE-34567",
  "purchase_date": null,
  "priority": "high",
  "category": "refund_request",
  "is_complaint": true,
  "tags": [
    "product",
    "complaint"
  ],
  "recommended_next_action": "escalate_to_agent",
  "order_details": {
    "status": "delivered",
    "estimated_delivery": "2025-12-20",
    "note": "order_id found but email mismatch"
  },
  "faq_response": null,
  "creation_date": "2025-08-08T00:32:05.510655"
}


In [66]:
# Define new user input data
user_json = '''
{
    "name": "Joe User",
    "email": "joe@example.com",
    "query": "I'm really not happy with this product I bought",
    "order_id": "QWE-34567",
    "purchase_date": null
}
'''

In [70]:
# Run the entire pipeline
valid_user_json = validate_user_input(user_json).model_dump_json()
customer_query = create_customer_query(valid_user_json)
message, tool_calls, messages = decide_next_action_with_tools(
    customer_query
)
tool_outputs = get_tool_outputs(tool_calls)
support_ticket = generate_structured_support_ticket(
    customer_query, message, tool_outputs
)
print(support_ticket.model_dump_json(indent=2))

User input validated ...
CustomerQuery generated ...
Agent requested a call to Check Order Status tool...
Check Order Status tool returned {'order_id': 'QWE-34567', 'status': 'delivered', 'estimated_delivery': '2025-12-20', 'note': 'order_id found but email mismatch'}
{
  "name": "Joe User",
  "email": "joe@example.com",
  "query": "I'm really not happy with this product I bought",
  "order_id": "QWE-34567",
  "purchase_date": null,
  "priority": "high",
  "category": "other",
  "is_complaint": true,
  "tags": [
    "product",
    "unhappy",
    "complaint"
  ],
  "recommended_next_action": "escalate_to_agent",
  "order_details": null,
  "faq_response": null,
  "creation_date": "2025-08-08T00:32:30.780324"
}
