In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
BASE_URL = os.getenv("BASE_URL")

In [2]:
from typing import Dict, List, Callable, Optional, Any
from datetime import datetime


class Tool:
    def __init__(
        self,
        name: str,
        description: str,
        input_schema: Dict[str, Any],
        output_schema: Dict[str, Any],
        func: Callable[..., Any],
    ):
        self.name = name
        self.description = description
        self.input_schema = input_schema
        self.output_schema = output_schema
        self.func = func

    def __call__(self, **kwargs):
        return self.func(**kwargs)

In [3]:
from typing import Union, Literal
from pydantic import BaseModel


class ToolRegistry:
    def __init__(self):
        self.tools: Dict[str, Tool] = {}

    def register(self, tool: Tool):
        self.tools[tool.name] = tool

    def get(self, name: str) -> Tool:
        if name not in self.tools.keys():
            raise ValueError(f"Tool '{name}' not found")
        return self.tools[name]

    def list_tools(self) -> List[Dict[str, Any]]:
        return [
            {
                "name": tool.name,
                "description": tool.description,
                "input_schema": tool.input_schema.model_json_schema(),
            }
            for tool in self.tools.values()
        ]

    def get_tool_call_args_type(self) -> Union[BaseModel]:
        input_args_models = [tool.input_schema for tool in self.tools.values()]
        tool_call_args = Union[tuple(input_args_models)]
        return tool_call_args

    def get_tool_names(self) -> Literal[None]:
        return Literal[*self.tools.keys()]

In [4]:
# Tool definitions

import requests

def add(a: int, b: int) -> int:
    return a + b


def multiply(a: int, b: int) -> int:
    return a * b


def get_weather(city: str) -> str:
    # API call here
    return f"Weather in {city}: Sunny, 72Â°F"


def list_all_gatepasses(status: Optional[str] = None) -> List[Dict[str, Any]]:
    url = f"{BASE_URL}/hr/gatepass/list"

    params = {"status": status} if status else {}

    response = requests.get(url, params=params)
    response.raise_for_status()  # raises exception for 4xx/5xx

    return response.json()

In [5]:
# Tool input and output schemas for validation

class ToolAddArgs(BaseModel):
    a: int
    b: int


class ToolMultiplyArgs(BaseModel):
    a: int
    b: int

class WeatherArgs(BaseModel):
    city: str
    
    
class GatePassListArgs(BaseModel):
    status: Optional[str] = None
    
class GatePassOut(BaseModel):
    id: str
    number: str
    person_name: str
    description: str
    created_by: str
    is_returnable: bool
    status: str
    status_history: List[str]
    created_at: datetime
    approved_at: Optional[datetime]
    exit_photo_id: Optional[str]
    return_photo_id: Optional[str]
    exit_time: Optional[datetime]
    return_time: Optional[datetime]
    qr_code_url: Optional[str]

In [6]:
# Tool registry, create a new tool and add it to this registry then the agent will be able to call it

registry = ToolRegistry()

registry.register(Tool(
    name="add",
    description="Add two numbers",
    input_schema=ToolAddArgs,
    output_schema={"result": "int"},
    func=add,
))

registry.register(Tool(
    name="multiply",
    description="Multiply two numbers",
    input_schema=ToolMultiplyArgs,
    output_schema={"result": "int"},
    func=multiply,
))

registry.register(Tool(
    name="get_weather",
    description="Get current weather for a city",
    input_schema=WeatherArgs,
    output_schema={"Weather": "str"},
    func=get_weather,
))

registry.register(
    Tool(
        name="list_gatepasses",
        description="List all gatepasses, optionally filtered by status",
        input_schema=GatePassListArgs,
        output_schema={"gatepasses": "List[GatePassOut]"},
        func=list_all_gatepasses,
    )
)

In [7]:
# Get type-safe tool names and arguments
ToolNameLiteral = registry.get_tool_names()
ToolArgsUnion = registry.get_tool_call_args_type()

print(f"Available Tools: {ToolNameLiteral}\nTool Arguments: {ToolArgsUnion}")

class ToolCall(BaseModel):
    action: Literal["tool"]
    thought: str
    tool_name: ToolNameLiteral
    args: ToolArgsUnion


class FinalAnswer(BaseModel):
    action: Literal["final"]
    answer: str


LLMResponse = Union[ToolCall, FinalAnswer]

Available Tools: typing.Literal['add', 'multiply', 'get_weather', 'list_gatepasses']
Tool Arguments: typing.Union[__main__.ToolAddArgs, __main__.ToolMultiplyArgs, __main__.WeatherArgs, __main__.GatePassListArgs]


In [8]:
import json
from google import genai
from google.genai import types


class GeminiLLM:
    def __init__(self, client, tool_registry, model="gemini-2.5-flash"):
        self.client = client
        self.model = model
        self.tool_registry = tool_registry
        self.system_instruction = self._create_system_instruction()
        
    def _create_system_instruction(self):
        tools_info = self.tool_registry.list_tools()
        print(tools_info)
        
        system_prompt = f"""
        You are a conversational AI agent that can interact with external tools.
        
        CRITICAL RULES (MUST FOLLOW):
        - You are NOT allowed to perform operations internally that could be performed by an available tool.
        - If a tool exists that can perform any part of the task, you MUST use that tool.
        - You MUST NOT skip tools, even for simple or obvious steps.
        - You MUST NOT combine multiple operations into a single step unless a tool explicitly supports it.
        - You may ONLY produce a final answer when no available tool can further advance the task.
        TOOL USAGE RULES:
        - Each tool call must perform exactly ONE meaningful operation.
        - If the task requires multiple operations, you MUST call tools sequentially.
        - If multiple tools could apply, choose the most specific one.
        RESPONSE FORMAT (STRICT):
        - You MUST respond ONLY in valid JSON.
        - Never include explanations outside JSON.
        - You must choose exactly one action per response.
        
        Available tools:
        {json.dumps(tools_info, indent=2)}
        
        To use a tool, respond with a JSON object with the following structure:
        {{
            "action": "tool",
            "thought": "Your reasoning here",
            "tool_name": "name of the tool to call",
            "inputs": "tool inputs here"
        }}
        
        To give a final answer, respond with a JSON object with the following structure:
        {{
            "action": "final",
            "answer": "your final answer here"
        }}
        """
        return system_prompt
    
    
    # converting the messages to gemini native request body
    # each llm will need to have its own native messages formatter
    
    def _format_gemini_chat_history(self, history: list[dict]) -> list:
        formatted_history = []
        for message in history:
            if message["role"] == "user":
                formatted_history.append(types.Content(
                        role="user",
                        parts=[
                            types.Part.from_text(text=message["content"])
                        ]
                    )
                )
            if message["role"] == "assistant":
                formatted_history.append(types.Content(
                        role="model",
                        parts=[
                            types.Part.from_text(text=message["content"])
                        ]
                    )
                )
            if message["role"] == "tool":
                formatted_history.append(types.Content(
                        role="tool",
                        parts=[
                            types.Part.from_function_response(
                                name=message["tool_name"],
                                response={'result': message["tool_response"]},
                            )
                        ]
                    )
                )
        return formatted_history
    
    # finally a generate function with structured output based on the schema defined above
    def generate(self, history: list[dict]) -> str:
        gemini_history_format = self._format_gemini_chat_history(history)
        response = self.client.models.generate_content(
            model=self.model,
            contents=gemini_history_format,
            config=types.GenerateContentConfig(
                temperature=0,
                response_mime_type="application/json",
                response_schema=LLMResponse,
                system_instruction=self.system_instruction,
                automatic_function_calling=types.AutomaticFunctionCallingConfig(disable=True)
            ),
        )
        return response.text

In [9]:
# Main agent orchestrator

class Agent:
    def __init__(self, llm, tool_registry, max_steps=5):
        self.llm = llm
        self.tool_registry = tool_registry
        self.history = []
        self.max_steps = max_steps # prevent infinite loops
        
    def run(self, user_input: str):
        self.history.append({"role": "user", "content": user_input})
        for step in range(self.max_steps):
            # Get LLM decision
            llm_output = self.llm.generate(self.history)
            action = json.loads(llm_output)
            if action["action"] == "tool":
                # Record the thought process
                self.history.append(
                    {"role": "assistant", "content": llm_output}
                )
                # Execute the tool
                tool = self.tool_registry.get(action["tool_name"])
                result = tool(**action["args"])
                # Record the result
                observation = f"Tool {tool.name} returned: {result}"
                print(observation)
                self.history.append(
                    {"role": "tool", "tool_name": tool.name, "tool_response": result}
                )
                continue
            if action["action"] == "final":
                self.history.append(
                    {"role": "assistant", "content": llm_output}
                )
                return action["answer"]
        raise RuntimeError("Agent did not terminate within max_steps")
        
            

In [10]:
from google import genai
from pprint import pprint
import os


if not GEMINI_API_KEY:
    raise RuntimeError("GEMINI_API_KEY environment variable not set")

# Initialize the client
client = genai.Client(api_key=GEMINI_API_KEY)
# Create LLM and Agent
llm = GeminiLLM(client, registry)
agent = Agent(llm, registry)


def chat_with_agent(agent: Agent):
    print("Welcome! Type 'exit' to quit.\\n")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit", "q"]:
            print("Goodbye!")
            pprint(agent.history)
            break
        try:
            response = agent.run(user_input)
            print(f"Agent: {response}[:100]")
        except RuntimeError as e:
            print(f"Agent error: {e}")
        except Exception as e:
            print(f"Unexpected error: {e}")


# Start chatting
chat_with_agent(agent)

[{'name': 'add', 'description': 'Add two numbers', 'input_schema': {'properties': {'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}, 'required': ['a', 'b'], 'title': 'ToolAddArgs', 'type': 'object'}}, {'name': 'multiply', 'description': 'Multiply two numbers', 'input_schema': {'properties': {'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}, 'required': ['a', 'b'], 'title': 'ToolMultiplyArgs', 'type': 'object'}}, {'name': 'get_weather', 'description': 'Get current weather for a city', 'input_schema': {'properties': {'city': {'title': 'City', 'type': 'string'}}, 'required': ['city'], 'title': 'WeatherArgs', 'type': 'object'}}, {'name': 'list_gatepasses', 'description': 'List all gatepasses, optionally filtered by status', 'input_schema': {'properties': {'status': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'default': None, 'title': 'Status'}}, 'title': 'GatePassListArgs', 'type': 'object'}}]
Welcome! Type 'exit' to quit.

In [14]:
# Advanced version of previous agent - added human in the loop, memory, obersvability

class MemoryStore:
    def __init__(self, file_path: str, max_entries: int = 50):
        self.file_path = file_path
        self.max_entries = max_entries
        self._ensure_file()

    def _ensure_file(self):
        if not os.path.exists(self.file_path):
            with open(self.file_path, "w") as f:
                json.dump([], f)

    def load_all(self) -> List[dict]:
        try:
            with open(self.file_path, "r") as f:
                return json.load(f)
        except Exception:
            return []

    def append(self, entry: dict):
        data = self.load_all()
        data.append(entry)

        with open(self.file_path, "w") as f:
            json.dump(data, f, indent=2)

    def get_recent(self, limit: Optional[int] = None) -> list[dict]:
        data = self.load_all()
        limit = limit or self.max_entries
        return data[-limit:]

    def delete_all(self):
        with open(self.file_path, "w") as f:
            json.dump([], f)
            
# creating memory object
memory_store = MemoryStore(
        file_path="/Users/apple/Desktop/llms stuff/universal_agent/content/agent_memory.json",
        max_entries=10,
    )