In [20]:
from mirascope import llm, prompt_template
from typing import Literal
from pydantic import BaseModel, Field
import os 
from dotenv import load_dotenv
from datetime import datetime

load_dotenv()
    

: 

In [21]:
def web_search(query: str) -> str:
    """
    Searches the web and returns the summaries of top results.
    
    Args:
        query: The search query to be executed.
        
    Returns:
        A string containing the summaries of the top results.
    """
    try:
        from brave import Brave
        brave = Brave(api_key=os.getenv("BRAVE_API_KEY"))
        results = brave.search(q=query, count=10, raw=True)
        web_results = results.get("web", {}).get("results", [])
        
        summaries = []
        for result in web_results:
            if 'profile' not in result:
                continue
            url = result['url']
            header = f"{result['profile']['name']} - {result['profile']['long_name']}"
            title = result['title']
            snippet = result['description']
            summaries.append(f"{header}\n{title}\n{snippet}\n{url}")
        return "\n\n".join(summaries)
    except Exception as e:
        return f"Error searching the web: {e}"

In [22]:
# testing the search function
result = web_search("what is the latest research on RL on tool calling?")
print(result)

GitHub - github.com
GitHub - Agent-RL/ReCall: ReCall: Learning to Reason with Tool Call for LLMs via Reinforcement Learning
We introduce <strong>ReCall</strong>, a novel framework that trains LLMs to Reason with Tool Call via reinforcement learning—without requiring any supervised data on tool use trajectories or reasoning steps. <strong>ReCall</strong> empowers LLMs to agentically use and combine arbitrary tools like OpenAI o3, offering an accessible ...
https://github.com/Agent-RL/ReCall

Freecodecamp - freecodecamp.org
The Agentic AI Handbook: A Beginner's Guide to Autonomous Intelligent Agents
You may have heard about “Agentic AI” systems and wondered what they’re all about. Well, in basic terms, the idea behind Agentic AI is that it can see its surroundings, set and pursue goals, plan and reason through many processes, and learn from expe...
https://www.freecodecamp.org/news/the-agentic-ai-handbook/

Sebastianraschka - sebastianraschka.com
The State of Reinforcement Learning for L

In [23]:
def extract_content(url: str) -> str:
    """
    Fetches the content of a given URL and returns it as a markdown page.
    
    Args:
        url: The URL to fetch the content from.
        
    Returns:
        A string containing the content of the URL as a markdown page.
    """
    import requests
    import markdownify
    response = requests.get(url)
    content = response.text
    markdown = markdownify.markdownify(content)
    return markdown

# testing the fetch function
result = extract_content("https://arxiv.org/html/2504.13958v1")
print(result)

ToolRL: Reward is All Tool Learning Needs





1. [1 Introduction](https://arxiv.org/html/2504.13958v1#S1 "In ToolRL: Reward is All Tool Learning Needs")
2. [2 Related Work](https://arxiv.org/html/2504.13958v1#S2 "In ToolRL: Reward is All Tool Learning Needs")
   1. [Tool-Integrated Reasoning of LLMs.](https://arxiv.org/html/2504.13958v1#S2.SS0.SSS0.Px1 "In 2 Related Work ‣ ToolRL: Reward is All Tool Learning Needs")
   2. [Exploration of RL in LLMs.](https://arxiv.org/html/2504.13958v1#S2.SS0.SSS0.Px2 "In 2 Related Work ‣ ToolRL: Reward is All Tool Learning Needs")
3. [3 Method](https://arxiv.org/html/2504.13958v1#S3 "In ToolRL: Reward is All Tool Learning Needs")
   1. [3.1 Task Definition](https://arxiv.org/html/2504.13958v1#S3.SS1 "In 3 Method ‣ ToolRL: Reward is All Tool Learning Needs")
   2. [3.2 TIR Rollout](https://arxiv.org/html/2504.13958v1#S3.SS2 "In 3 Method ‣ ToolRL: Reward is All Tool Learning Needs")
   3. [3.3 Reward Design](https://arxiv.org/html/2504.13958v1#S3.SS3 "

In [24]:
@llm.call(provider='google', model='gemini-2.0-flash', tools=[web_search, extract_content])
@prompt_template(        """
        SYSTEM:
        You are an expert web searcher. Your task is to answer the user's question using the provided tools.
        Use the current date provided to search the web for the most up to date information.
        The current date is {current_date}.

        You have access to the following tools:
        - `web_search(query: str)`: Searches the web and returns summaries of top results.
        - `extract_content(url: str)`: Parse the content of a webpage of a given URL and returns it as a markdown page.

        When calling the `web_search` tool, the `body` is simply the summary of the search
        result with the URL. You MUST then call the `extract_content` tool to get the actual content
        of the webpage. It is up to you to determine which search results to parse.
        
        You may call one tool per turn, for up to 10 turns before giving your final answer.
        
        In each turn you should give your thinking process and the final answer when you have gathered all of the information you need.

        Once you have gathered all of the information you need, generate a writeup that
        strikes the right balance between brevity and completeness based on the context of the user's query.

        MESSAGES: {history}
        USER: {question}
        """
        )
def search(question: str, history: list = None):
        return {"computed_fields": {"current_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "history": history or []}}
    


In [8]:
result = search("Why did OKC Thunder lose game 1 of the NBA finals?")

result

GoogleCallResponse(metadata={}, response=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='Okay, I need to find out why the OKC Thunder lost game 1 of the NBA finals. Since the current date is 2025-06-08, I will search for the results of the 2025 NBA finals game 1.\n'), Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=FunctionCall(id=None, args={'query': 'OKC Thunder game 1 2025 NBA finals result'}, name='web_search'), function_response=None, inline_data=None, text=None)], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=-0.07837329412761487, finish_reason=<FinishReason.STOP: 'STOP'>, grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=None)], model_version='gemin

In [16]:
def run_agent_with_tools(question: str, max_iterations: int = 10):
    """
    Run the agent with iterative tool calling until completion.
    
    Args:
        question: The user's question
        max_iterations: Maximum number of tool calling iterations to prevent infinite loops
        
    Returns:
        Dict containing the final response and execution details
    """
    conversation_history = []
    total_cost = 0
    total_tokens = 0
    iteration = 0
    
    print(f"🤖 Starting agent for question: {question}")
    print("=" * 60)
    
    while iteration < max_iterations:
        iteration += 1
        print(f"\n📍 Iteration {iteration}")
        
        # Make the LLM call with conversation history
        result = search(question, history=conversation_history)
        
        # making sure you dont go broke
        total_cost += result.cost
        total_tokens += result.input_tokens + result.output_tokens
        
        # made the LLM a thinkoor
        print(f"💭 LLM Response: {result.content}")
        
        # Add user message to history (this is the agents state)
        if iteration == 1 and result.user_message_param:
            conversation_history.append(result.user_message_param)
        
        # Add assistant message to history
        conversation_history.append(result.message_param)
        
        # Check if tools were called
        if result.tools:
            print(f"🔧 Tools called: {len(result.tools)}")
            tools_and_outputs = []
            
            for i, tool in enumerate(result.tools):
                print(f"   Tool {i+1}: {tool._name()}({tool.args})")
                
                # Execute the tool
                try:
                    output = tool.call()
                    tools_and_outputs.append((tool, output))
                    print(f"   ✅ Tool output length: {len(str(output))} characters")
                except Exception as e:
                    print(f"   ❌ Tool error: {e}")
                    tools_and_outputs.append((tool, f"Error: {e}"))
            
            # Add tool results to conversation history
            if tools_and_outputs:
                conversation_history.extend(
                    result.tool_message_params(tools_and_outputs)
                )
            
            # Continue the loop to make another LLM call with the tool results
            continue
        else:
            # No tools called - agent is done
            print("✅ No tools called - Agent completed!")
            break
    
    if iteration >= max_iterations:
        print(f"⚠️  Reached maximum iterations ({max_iterations})")
    
    print("\n" + "=" * 60)
    print(f"📊 Final Stats:")
    print(f"   Iterations: {iteration}")
    print(f"   Total Cost: ${total_cost:.6f}")
    print(f"   Total Tokens: {total_tokens}")
    print(f"   Conversation History Length: {len(conversation_history)}")
    
    return {
        'final_response': result.content,
        'iterations': iteration,
        'total_cost': total_cost,
        'total_tokens': total_tokens,
        'conversation_history': conversation_history,
        'completed': iteration < max_iterations
    }


In [17]:
# Test the agent with iterative tool calling
result = run_agent_with_tools("Why did OKC Thunder lose game 1 of the NBA finals?")

print(f"\n🎯 Final Answer:")
print(result['final_response'])


🤖 Starting agent for question: Why did OKC Thunder lose game 1 of the NBA finals?

📍 Iteration 1
💭 LLM Response: The NBA finals are in June. The current date is June 8, 2025. Therefore, I will search for the results of the 2025 NBA finals.

🔧 Tools called: 1
   Tool 1: web_search({'query': 'OKC Thunder NBA Finals 2025 game 1 result'})
   ✅ Tool output length: 3868 characters

📍 Iteration 2
💭 LLM Response: 
🔧 Tools called: 1
   Tool 1: extract_content({'url': 'https://www.espn.com/nba/story/_/id/44610574/nba-finals-2025-postseason-news-scores-highlights'})
   ✅ Tool output length: 630 characters

📍 Iteration 3
💭 LLM Response: 
🔧 Tools called: 1
   Tool 1: web_search({'query': 'OKC Thunder vs Pacers Game 1 NBA Finals 2025 recap'})
   ✅ Tool output length: 4562 characters

📍 Iteration 4
💭 LLM Response: The OKC Thunder lost Game 1 of the 2025 NBA Finals to the Indiana Pacers with a score of 111-110. A late comeback by the Pacers, capped by a pull-up jumper from Tyrese Haliburton with 0.3 s