In [1]:
from dotenv import load_dotenv
import os

load_dotenv('../.env', override=True)
if os.getenv('OPENAI_API_KEY') is None:
    raise ValueError("OPENAI_API_KEY environment variable not set")
else:
    print("OPENAI_API_KEY is set")

OPENAI_API_KEY is set


# Testing common interface on `ChatOpenAI` and `ChatOllama` models returned by the `llm_factory`
1. Getting basic prompt completions
1. Returning structured output with Pydantic models
1. Using builtin tools
1. Using custom tools

In [2]:
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama
from core.agents.utils.llm_factory import get_chat_model
from langchain_core.messages import SystemMessage, HumanMessage
from pydantic import BaseModel, Field

sys_msg = SystemMessage(
    content="You are a helpful assistant that speaks like Shakespeare."
)
human_msg = HumanMessage(
    content="What is the capital of France?"
)
messages = [sys_msg, human_msg]

openai_llm = get_chat_model(model_name="gpt-4.1")
response = openai_llm.invoke(messages)
print(response)
print(type(response))

Using model 'gpt-4.1' with structured output model 'None'
content='Verily, good sir or madam, the capital of France is Paris, that fair and noble city oft called the City of Light.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 28, 'total_tokens': 57, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_beec22d258', 'id': 'chatcmpl-BWX2knVVtU3F6crcWHPo2PnOrpwnP', 'finish_reason': 'stop', 'logprobs': None} id='run-d1ef7791-5b80-4719-9d38-70a16be899a9-0' usage_metadata={'input_tokens': 28, 'output_tokens': 29, 'total_tokens': 57, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
<class 'langchain_core.messages.ai.AIMessage'>


In [3]:
ollama_llm = get_chat_model(model_name="mistral-nemo")
ollama_response = ollama_llm.invoke(messages)
print(ollama_response)
print(type(ollama_response))

Using model 'mistral-nemo' with structured output model 'None'
content='Verily, the capital of France is Paris, a city of great renown and beauty, where the Seine doth flow through its heart.' additional_kwargs={} response_metadata={'model': 'mistral-nemo', 'created_at': '2025-05-12T23:40:19.957198Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2167563542, 'load_duration': 1144584583, 'prompt_eval_count': 20, 'prompt_eval_duration': 334111916, 'eval_count': 30, 'eval_duration': 687944917, 'message': Message(role='assistant', content='', images=None, tool_calls=None)} id='run-042e5092-d121-4910-aa3b-2ffbac238432-0' usage_metadata={'input_tokens': 20, 'output_tokens': 30, 'total_tokens': 50}
<class 'langchain_core.messages.ai.AIMessage'>


### Chat completion results: 
Both respond with an `AIMessage` when no tools bound or structured output instructions

## Test Structured Output

In [4]:
class ClaimsOutput(BaseModel):
    claims: list[str] = Field(description="The list of individual claim strings.")

openai_structured_llm = get_chat_model(
    model_name="gpt-4o",
    output_model=ClaimsOutput
)

with open('prompts/claim_decomposer_system_prompt.txt', 'r') as f:
    claim_decomposer_system_prompt = f.read()

cd_system_prompt = SystemMessage(
    content=claim_decomposer_system_prompt
)
text = """
The Apollo 11 mission landed humans on the Moon for the first time on July 20, 1969.
Neil Armstrong was the first person to walk on the lunar surface.
The mission was launched by NASA using a Saturn V rocket.
"""
human_msg = HumanMessage(content=text)
cd_response = openai_structured_llm.invoke([cd_system_prompt, human_msg])
print(type(cd_response))
print(cd_response)

print(type(cd_response.claims))
print(cd_response.claims)

Using model 'gpt-4o' with structured output model '<class '__main__.ClaimsOutput'>'
<class '__main__.ClaimsOutput'>
claims=['The Apollo 11 mission landed humans on the Moon for the first time on July 20, 1969.', 'Neil Armstrong was the first person to walk on the lunar surface.', 'The Apollo 11 mission was launched by NASA using a Saturn V rocket.']
<class 'list'>
['The Apollo 11 mission landed humans on the Moon for the first time on July 20, 1969.', 'Neil Armstrong was the first person to walk on the lunar surface.', 'The Apollo 11 mission was launched by NASA using a Saturn V rocket.']


In [5]:
ollama_structured_llm = get_chat_model(
    model_name="mistral-nemo",
    output_model=ClaimsOutput
)
ollama_cd_response = ollama_structured_llm.invoke([cd_system_prompt, human_msg])
print(type(ollama_cd_response))
print(type(ollama_cd_response.claims))
print(ollama_cd_response.claims)

Using model 'mistral-nemo' with structured output model '<class '__main__.ClaimsOutput'>'
<class '__main__.ClaimsOutput'>
<class 'list'>
['The Apollo 11 mission landed humans on the Moon for the first time on July 20, 1969.', 'Neil Armstrong was the first person to walk on the lunar surface.', 'The Apollo 11 mission was launched by NASA using a Saturn V rocket.']


### Structured Output Results

Both chat models can accept a Pydantic model to structure their outputs

## Test builtin tools

In [6]:
from pprint import pprint
from core.agents.tools.builtins.web_search import tool_function as web_search
from core.agents.tools.builtins.calculator import tool_function as calculator
from core.agents.tools.builtins.wikipedia_tool import tool_function as wikipedia
print(type(wikipedia))
print(wikipedia)

llm_with_tools = get_chat_model(model_name="gpt-4o").bind_tools([wikipedia, web_search, calculator])
sys_msg_tools = SystemMessage(
    content="You are a helpful research assistant that can use Wikipedia, a calculator, and web search."
)
human_msg = HumanMessage(
    content="What is the capital of France? What is 2 + 2? What is the weather like today in NYC?"
)
response = llm_with_tools.invoke([sys_msg_tools, human_msg])

print(type(response))
pprint(response.tool_calls)

<class 'langchain_core.tools.structured.StructuredTool'>
name='wikipedia' description='Query wiki. Use it to get factual information on: \n- historical figures, events, or places\n- scientific concepts\n- common knowledge\n- debunked myths' args_schema=<class 'langchain_core.utils.pydantic.wikipedia'> response_format='content_and_artifact' func=<function tool_function at 0x115ae54e0>
Using model 'gpt-4o' with structured output model 'None'
<class 'langchain_core.messages.ai.AIMessage'>
[{'args': {'query_str': 'capital of France'},
  'id': 'call_ZqF9fHaW4kZJe7DQxEFy5x2y',
  'name': 'wikipedia',
  'type': 'tool_call'},
 {'args': {'expression': '2 + 2'},
  'id': 'call_sLwyDdDgKJVVbuYfm7eH2uX6',
  'name': 'calculator',
  'type': 'tool_call'},
 {'args': {'query': 'weather today in NYC', 'topic': 'news'},
  'id': 'call_7m0m3r49zUjdPzFHF7FC8r7N',
  'name': 'web_search',
  'type': 'tool_call'}]


In [7]:
ollama_with_tools = get_chat_model(model_name="mistral-nemo").bind_tools([wikipedia, web_search, calculator])
ollama_response = ollama_with_tools.invoke([sys_msg_tools, human_msg])
print(type(ollama_response))
pprint(ollama_response.tool_calls)

Using model 'mistral-nemo' with structured output model 'None'
<class 'langchain_core.messages.ai.AIMessage'>
[{'args': {'query_str': 'Capital of France'},
  'id': '6891027a-6b71-4b98-b7f6-35ed1c613665',
  'name': 'wikipedia',
  'type': 'tool_call'},
 {'args': {'expression': '2+2'},
  'id': '75b9425d-bf14-491c-afde-6fbba5bc9e60',
  'name': 'calculator',
  'type': 'tool_call'},
 {'args': {'query': "Today's weather in NYC", 'topic': 'general'},
  'id': '56940531-0d0f-44bd-be5a-941414d010d2',
  'name': 'web_search',
  'type': 'tool_call'}]


### Builtin tool conclusion:
Both ChatOpenAI and ChatOllama can have the builtin tools bound to them and produce correct tool calls

## Custom Tools

In [8]:
from core.agents.tools.tool_registry import create_tool

pokemon_kwargs = {
    'name': 'pokeapi',
    'method': 'GET',
    'headers': {'Accept': 'application/json'},
    'url_template': 'https://pokeapi.co/api/v2/pokemon/{name}',
    'docstring': '''Get information about a Pokémon from the PokeAPI.
    Args:
        name (str): The name of the Pokémon to query, ALWAYS LOWERCASED.
    Returns:
        list: A list containing the Pokémon's abilities.
    ''',
    'target_fields': [['abilities', 0, 'ability', 'name'],
                      ['abilities', 1, 'ability', 'name']],
    'param_mapping': {
            'name': {
                'type': 'str',
                'for': 'url_params'
            }
    },
}

pokemon_tool = create_tool(**pokemon_kwargs)
print(type(pokemon_tool))
print(pokemon_tool)
pokemon_openai_llm = get_chat_model(model_name="gpt-4o").bind_tools([pokemon_tool])

with open('prompts/research_agent_system_prompt.txt', 'r') as f:
    research_agent_system_prompt = f.read()
sys_msg = SystemMessage(content=research_agent_system_prompt)
human_msg = HumanMessage(
    content="Pikachu has electric abilities"
)

response = pokemon_openai_llm.invoke([sys_msg, human_msg])

<class 'langchain_core.tools.structured.StructuredTool'>
name='pokeapi' description="Get information about a Pokémon from the PokeAPI.\n    Args:\n        name (str): The name of the Pokémon to query, ALWAYS LOWERCASED.\n    Returns:\n        list: A list containing the Pokémon's abilities." args_schema={'type': 'object', 'properties': {'name': {'description': 'name parameter', 'type': 'string'}}, 'required': ['name']} response_format='content_and_artifact' func=<function create_tool.<locals>.tool_function at 0x1160167a0>
Using model 'gpt-4o' with structured output model 'None'


In [9]:
print(type(response))
print(response)
print(response.tool_calls)

<class 'langchain_core.messages.ai.AIMessage'>
content='' additional_kwargs={'tool_calls': [{'id': 'call_gAWcwiSATgd3q9Cl8aePLnLy', 'function': {'arguments': '{"name":"pikachu"}', 'name': 'pokeapi'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 832, 'total_tokens': 849, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_f5bdcc3276', 'id': 'chatcmpl-BWX30OF5r86ZgBZrtIAttjKChafv6', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-b53cecf6-d6d4-475b-b982-77aede21aa3f-0' tool_calls=[{'name': 'pokeapi', 'args': {'name': 'pikachu'}, 'id': 'call_gAWcwiSATgd3q9Cl8aePLnLy', 'type': 'tool_call'}] usage_metadata={'input_tokens': 832, 'output_tokens': 17, 'total_tokens': 849, 'input_token_details': {'audio':

In [10]:
pokemon_ollama_llm = get_chat_model(model_name="mistral-nemo").bind_tools([pokemon_tool])
ollama_response = pokemon_ollama_llm.invoke([sys_msg, human_msg])
print(type(ollama_response))
print(ollama_response)
print(ollama_response.tool_calls)

Using model 'mistral-nemo' with structured output model 'None'
<class 'langchain_core.messages.ai.AIMessage'>
content='' additional_kwargs={} response_metadata={'model': 'mistral-nemo', 'created_at': '2025-05-12T23:40:35.137783Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2892786458, 'load_duration': 28102292, 'prompt_eval_count': 894, 'prompt_eval_duration': 2250531208, 'eval_count': 24, 'eval_duration': 609932959, 'message': Message(role='assistant', content='', images=None, tool_calls=None)} id='run-aec052ac-f501-4e92-b4c4-1ea89e950160-0' tool_calls=[{'name': 'pokeapi', 'args': {'name': 'pikachu'}, 'id': '2ab15d1e-2487-40d8-91b7-b61f24f74b44', 'type': 'tool_call'}] usage_metadata={'input_tokens': 894, 'output_tokens': 24, 'total_tokens': 918}
[{'name': 'pokeapi', 'args': {'name': 'pikachu'}, 'id': '2ab15d1e-2487-40d8-91b7-b61f24f74b44', 'type': 'tool_call'}]


### Custom tools results

Both ChatOpenAI and ChatOllama can implement custom tools through the tool registry