# functions

In [1]:
import uuid
from langchain_core.output_parsers import JsonOutputParser

class JsonOrRawParser(JsonOutputParser):
    def invoke(self, input, config=None):
        try:
            input.content = input.content.replace("tool_call", "")
            tool_calls = super().invoke(input)
            tool_calls = [tool_call | {"id": str(uuid.uuid4()), "type": "tool_call"} for tool_call in tool_calls]
            input.tool_calls = tool_calls
            input.content = ""
            
            return input
            
        except Exception as e:
            print(e)
            return input

In [None]:
# system_template = """\
# You have access to tools. If you decide to invoke any of the tools(s),
# you MUST put it in the format of
# [
#   {{{{
#     "name": "tool_name",
#     "arguments": {{{{ "arg1": "value1", "arg2": "value2" }}}}
#   }}}},
#   ...
# ]
# You SHOULD NOT include any other text in the response if you call a function.
# Available functions:
# [{rendered_tools}]
# """

In [34]:
import json
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_core.tools import render_text_description


class ChatOllamaCustomized(ChatOllama):
    def bind_tools(self, tools):

        system_template = """\
        You are a helpful assistant with access to the following powerful tools:

        [{rendered_tools}]

        **Crucial Instruction:**
        If a user's request can be answered by using any of the available tools, you MUST use the tool(s) to retrieve the information.
        Only if no tool is suitable, you may respond directly.

        When using a tool, you MUST respond ONLY with the tool invocation(s) in the exact JSON format below. Do NOT include any other text, explanation, or commentary outside this JSON structure:

        [
          {{{{
            "name": "tool_name",
            "args": {{{{
              "arg1": "value1",
              "arg2": "value2"
            }}}}
          }}}},
          ...
        ]

        If no tool is appropriate for the user's query, you may respond normally with a direct answer.
        """
        
        rendered_tools = json.dumps([tool.args_schema.model_json_schema() for tool in tools], indent=2)
        rendered_tools = rendered_tools.replace("{", "{{").replace("}", "}}")
        
        system_prompt_template = SystemMessagePromptTemplate.from_template(system_template)
        system_prompt = system_prompt_template.format_messages(rendered_tools=rendered_tools)
        
        prompt = ChatPromptTemplate.from_messages(
            [("system", system_prompt[0].content), ("user", "{input}")]
        )

        chain = prompt | self | JsonOrRawParser()

        return chain.bind()

# setup

In [35]:
base_url="http://host.docker.internal:11434"
model_name = "gemma3:27b"

llm = ChatOllama(
    model=model_name,
    base_url=base_url,
    temperature=0.7,
)

# tools

In [36]:
from langchain_core.tools import tool


@tool
def multiply(x: float, y: float) -> float:
    """Multiply two numbers together."""
    return x * y


@tool
def add(x: int, y: int) -> int:
    "Add two numbers."
    return x + y

@tool
def retrieve(input_query: str):
    """
    Accesses an external knowledge base to retrieve relevant and useful documents based on the user's query.
    Use this function whenever the query requires detailed factual information, specific reference material,
    or external background context that might not be part of my general training data.
    This function is particularly useful for questions seeking explanations, definitions, or examples
    that benefit from a comprehensive document search.
    """
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized

tools = [retrieve]

tools = [retrieve, multiply, add]


# usage

In [37]:
llm = ChatOllamaCustomized(
    model=model_name,
    base_url=base_url,
)

In [38]:
llm_with_tools = llm.bind_tools(tools)

In [39]:
llm_with_tools.invoke("12+7 and 15*8")

AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'gemma3:27b', 'created_at': '2025-05-24T11:14:19.343095168Z', 'done': True, 'done_reason': 'stop', 'total_duration': 6052999342, 'load_duration': 3130651443, 'prompt_eval_count': 594, 'prompt_eval_duration': 567274183, 'eval_count': 87, 'eval_duration': 2310784576, 'model_name': 'gemma3:27b'}, id='run--7b93889b-3fa1-4065-bb05-8c42c5a5f448-0', tool_calls=[{'name': 'add', 'args': {'x': 12, 'y': 7}, 'id': 'c7052ff9-5649-4a70-b14c-bf7ee7ff2204', 'type': 'tool_call'}, {'name': 'multiply', 'args': {'x': 15, 'y': 8}, 'id': '57348c0c-5298-487e-b9f8-3f4933f7133c', 'type': 'tool_call'}], usage_metadata={'input_tokens': 594, 'output_tokens': 87, 'total_tokens': 681})

In [15]:
llm_with_tools.invoke("when the first man landed on the moon")

AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'gemma3:27b', 'created_at': '2025-05-24T08:57:38.214447774Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1360452524, 'load_duration': 32244382, 'prompt_eval_count': 582, 'prompt_eval_duration': 44999883, 'eval_count': 49, 'eval_duration': 1281181997, 'model_name': 'gemma3:27b'}, id='run--7fc3017b-b1b8-4b9c-a4be-7bf22cb60c02-0', tool_calls=[{'name': 'retrieve', 'arguments': {'input_query': 'when the first man landed on the moon'}, 'id': '13a19ad1-2063-4c4f-bad5-896eb33a6887', 'type': 'tool_call'}], usage_metadata={'input_tokens': 582, 'output_tokens': 49, 'total_tokens': 631})

In [16]:
llm_with_tools.invoke("What are the different types of task decomposition techniques, and how are they used in AI agents?")

AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'gemma3:27b', 'created_at': '2025-05-24T08:57:39.744597884Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1307607485, 'load_duration': 30020848, 'prompt_eval_count': 593, 'prompt_eval_duration': 44223384, 'eval_count': 47, 'eval_duration': 1231470291, 'model_name': 'gemma3:27b'}, id='run--ee7f6e9c-2d66-4d3f-aa22-cc3ffb7c3e6e-0', tool_calls=[{'name': 'retrieve', 'arguments': {'input_query': 'task decomposition techniques in AI agents'}, 'id': 'd8766db4-0227-41ea-b6e3-c1f7bdf68731', 'type': 'tool_call'}], usage_metadata={'input_tokens': 593, 'output_tokens': 47, 'total_tokens': 640})

In [17]:
llm_with_tools.invoke("what is task decomposition")

AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'gemma3:27b', 'created_at': '2025-05-24T08:57:41.16583997Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1196044471, 'load_duration': 30180782, 'prompt_eval_count': 578, 'prompt_eval_duration': 43297192, 'eval_count': 43, 'eval_duration': 1120802804, 'model_name': 'gemma3:27b'}, id='run--eacc2869-fa7c-4587-adc1-ac43753d01d2-0', tool_calls=[{'name': 'retrieve', 'arguments': {'input_query': 'task decomposition'}, 'id': 'f3fcf5e6-e9c7-493d-8a6b-6c30be99b5ea', 'type': 'tool_call'}], usage_metadata={'input_tokens': 578, 'output_tokens': 43, 'total_tokens': 621})

# test