<a href="https://colab.research.google.com/drive/1PzWDDIItkrKbo2f8Reb20MqpafCR4VEc?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>

### Toolformer Agent Implementation

A self-teaching agent that learns to use tools through:
1. Generate potential API calls
2. Execute actual API calls
3. Observe real results
4. Learn which tools improve predictions
5. Self-fine-tune on successful tool usage

In [1]:
!pip install -qU google-generativeai

In [2]:
import google.generativeai as genai
import getpass
import re
from datetime import datetime

Get Google's Gemini API Key here: https://aistudio.google.com/app/apikey

In [3]:
API_KEY = getpass.getpass("Enter your Google API key: ")

Enter your Google AI API key: ··········


In [8]:
# Configure API
genai.configure(api_key=API_KEY)

In [9]:
class ToolformerAgent:
    def __init__(self):
        self.model = genai.GenerativeModel("gemini-2.0-flash")
        self.tools = {}
        self.tool_usage_history = []  # Track which tools work well
        self.learned_patterns = []     # Successful tool usage patterns

    def add_tool(self, name, func, description):
        """Register a tool with the agent"""
        self.tools[name] = {
            "func": func,
            "desc": description,
            "success_count": 0,
            "fail_count": 0,
            "usefulness_score": 0.5  # Start neutral
        }

    def generate_potential_calls(self, query):
        """Generate possible tool calls for the query"""
        tools_desc = "\n".join([
            f"- {name}: {info['desc']} (success rate: {self._get_success_rate(name)})"
            for name, info in self.tools.items()
        ])

        learned_examples = "\n".join([
            f"Example: '{p['query']}' -> Use {p['tool']}({p['params']}) -> Success"
            for p in self.learned_patterns[-5:]  # Last 5 successful patterns
        ]) if self.learned_patterns else "No learned patterns yet."

        prompt = f"""You are Toolformer, an agent that learns to use tools.

        Query: {query}

        Available Tools:
        {tools_desc}

        Learned Successful Patterns:
        {learned_examples}

        Generate 2-3 potential tool calls that might help answer this query.
        For each, explain why it might be useful.

        Format:
        Tool: tool_name
        Params: {{"param": "value"}}
        Expected Help: How this tool would improve the answer
        ---

        Response:"""

        response = self.model.generate_content(prompt).text
        return self._parse_potential_calls(response)

    def _parse_potential_calls(self, response):
        """Parse generated tool calls from LLM response"""
        calls = []
        current_call = {}

        for line in response.split("\n"):
            line = line.strip()
            if line.startswith("Tool:"):
                if current_call:
                    calls.append(current_call)
                current_call = {"tool": line.split("Tool:")[-1].strip()}
            elif line.startswith("Params:"):
                try:
                    params_str = line.split("Params:")[-1].strip()
                    current_call["params"] = eval(params_str)
                except:
                    current_call["params"] = {}
            elif line.startswith("Expected Help:"):
                current_call["expected_help"] = line.split("Expected Help:")[-1].strip()
            elif line == "---" and current_call:
                calls.append(current_call)
                current_call = {}

        if current_call:
            calls.append(current_call)

        return calls

    def execute_tool(self, tool_name, params):
        """Execute a tool and return result"""
        try:
            if tool_name not in self.tools:
                return {"success": False, "result": None, "error": "Tool not found"}

            result = self.tools[tool_name]["func"](**params)
            return {"success": True, "result": result, "error": None}
        except Exception as e:
            return {"success": False, "result": None, "error": str(e)}

    def evaluate_usefulness(self, query, tool_name, tool_result, original_answer, enhanced_answer):
        """Evaluate if tool improved the answer"""
        prompt = f"""Evaluate if the tool improved the answer:

        Query: {query}
        Tool Used: {tool_name}
        Tool Result: {tool_result}

        Answer WITHOUT tool: {original_answer}
        Answer WITH tool: {enhanced_answer}

        Did the tool make the answer:
        1. More accurate?
        2. More informative?
        3. More helpful?

        Rate usefulness (0-10):
        Reasoning:"""

        response = self.model.generate_content(prompt).text

        # Extract score
        score_match = re.search(r'(\d+)', response)
        score = int(score_match.group(1)) if score_match else 5

        return {
            "score": score / 10.0,  # Normalize to 0-1
            "reasoning": response,
            "improved": score >= 7
        }

    def answer_without_tools(self, query):
        """Generate baseline answer without tools"""
        prompt = f"Answer this query briefly without using any tools: {query}"
        response = self.model.generate_content(prompt).text
        return response.strip()

    def answer_with_tool(self, query, tool_name, tool_result):
        """Generate enhanced answer using tool result"""
        prompt = f"""Answer this query using the tool result:

        Query: {query}
        Tool Used: {tool_name}
        Tool Result: {tool_result}

        Provide an enhanced answer incorporating the tool result:"""

        response = self.model.generate_content(prompt).text
        return response.strip()

    def learn_from_execution(self, query, tool_name, params, execution_result, evaluation):
        """Update learning based on tool execution outcome"""
        tool_info = self.tools[tool_name]

        if execution_result["success"] and evaluation["improved"]:
            # Tool was useful
            tool_info["success_count"] += 1
            tool_info["usefulness_score"] = (
                tool_info["usefulness_score"] * 0.9 + evaluation["score"] * 0.1
            )

            # Store successful pattern
            self.learned_patterns.append({
                "query": query,
                "tool": tool_name,
                "params": params,
                "score": evaluation["score"],
                "timestamp": datetime.now().isoformat()
            })

            print(f"✅ Learned: {tool_name} is useful for queries like '{query}'\n")

        else:
            # Tool wasn't helpful
            tool_info["fail_count"] += 1
            tool_info["usefulness_score"] = tool_info["usefulness_score"] * 0.95
            print(f"❌ Learned: {tool_name} not useful for this query\n")

        # Store usage history
        self.tool_usage_history.append({
            "query": query,
            "tool": tool_name,
            "params": params,
            "success": execution_result["success"],
            "useful": evaluation["improved"],
            "score": evaluation["score"],
            "timestamp": datetime.now().isoformat()
        })

    def _get_success_rate(self, tool_name):
        """Calculate tool success rate"""
        info = self.tools[tool_name]
        total = info["success_count"] + info["fail_count"]
        if total == 0:
            return "untested"
        return f"{info['success_count']}/{total} ({info['usefulness_score']:.2f})"

    def run(self, query, auto_learn=True):
        """Run Toolformer with self-teaching"""
        print(f"\n{'='*70}")
        print(f"🎯 Query: {query}")
        print(f"{'='*70}\n")

        # Step 1: Generate baseline answer without tools
        print("📝 Step 1: Generating baseline answer (no tools)...")
        baseline_answer = self.answer_without_tools(query)
        print(f"Baseline: {baseline_answer}\n")

        # Step 2: Generate potential tool calls
        print("🔧 Step 2: Generating potential tool calls...")
        potential_calls = self.generate_potential_calls(query)
        print(f"Generated {len(potential_calls)} potential tool calls:\n")

        for i, call in enumerate(potential_calls, 1):
            print(f"  {i}. {call.get('tool', 'unknown')}({call.get('params', {})})")
            print(f"     Why: {call.get('expected_help', 'N/A')}\n")

        best_answer = baseline_answer
        best_score = 0
        best_tool = None

        # Step 3 & 4: Execute each tool and observe results
        for i, call in enumerate(potential_calls, 1):
            tool_name = call.get("tool")
            params = call.get("params", {})

            if not tool_name or tool_name not in self.tools:
                continue

            print(f"⚡ Step 3.{i}: Executing {tool_name}...")
            execution_result = self.execute_tool(tool_name, params)

            if not execution_result["success"]:
                print(f"   ❌ Execution failed: {execution_result['error']}\n")
                continue

            print(f"   ✅ Result: {execution_result['result']}\n")

            # Generate enhanced answer
            print(f"📊 Step 4.{i}: Evaluating usefulness...")
            enhanced_answer = self.answer_with_tool(query, tool_name, execution_result["result"])

            # Evaluate if tool improved answer
            evaluation = self.evaluate_usefulness(
                query, tool_name, execution_result["result"],
                baseline_answer, enhanced_answer
            )

            print(f"   Score: {evaluation['score']:.2f}/1.0")
            print(f"   Improved: {'Yes' if evaluation['improved'] else 'No'}\n")

            # Step 5: Learn from execution
            if auto_learn:
                print(f"🧠 Step 5.{i}: Learning from execution...")
                self.learn_from_execution(query, tool_name, params, execution_result, evaluation)

            # Track best answer
            if evaluation["score"] > best_score:
                best_score = evaluation["score"]
                best_answer = enhanced_answer
                best_tool = tool_name

        # Return best answer
        print(f"{'='*70}")
        print(f"🏆 FINAL ANSWER")
        print(f"{'='*70}")
        if best_tool:
            print(f"Best tool: {best_tool} (score: {best_score:.2f})")
        print(f"\n{best_answer}\n")

        return best_answer

    def show_learned_knowledge(self):
        """Display what the agent has learned"""
        print(f"\n{'='*70}")
        print("🧠 LEARNED KNOWLEDGE")
        print(f"{'='*70}\n")

        print("Tool Performance:")
        for name, info in self.tools.items():
            total = info["success_count"] + info["fail_count"]
            if total > 0:
                success_rate = info["success_count"] / total * 100
                print(f"  {name}:")
                print(f"    - Uses: {total}")
                print(f"    - Success Rate: {success_rate:.1f}%")
                print(f"    - Usefulness Score: {info['usefulness_score']:.2f}")

        print(f"\nSuccessful Patterns Learned: {len(self.learned_patterns)}")
        if self.learned_patterns:
            print("\nTop patterns:")
            sorted_patterns = sorted(self.learned_patterns, key=lambda x: x["score"], reverse=True)
            for pattern in sorted_patterns[:5]:
                print(f"  - Query: '{pattern['query']}'")
                print(f"    Tool: {pattern['tool']}({pattern['params']})")
                print(f"    Score: {pattern['score']:.2f}\n")

In [10]:
# Define diverse tools
def calculator(expression):
    """Perform mathematical calculations"""
    try:
        result = eval(expression)
        return f"Result: {result}"
    except Exception as e:
        raise Exception(f"Calculation error: {e}")

def search_web(query):
    """Search for current information (simulated)"""
    # Simulated search results
    results = {
        "weather": "Current weather: 22°C, Sunny",
        "news": "Latest news: Tech conference announced for next month",
        "python": "Python 3.12 released with performance improvements",
        "ai": "New AI models showing improved reasoning capabilities",
    }

    for key, val in results.items():
        if key in query.lower():
            return val

    return f"Search results for '{query}': General information available"

def calendar_check(date):
    """Check calendar for date (simulated)"""
    schedules = {
        "today": "3 meetings scheduled: 10 AM, 2 PM, 4 PM",
        "tomorrow": "1 meeting: 11 AM team sync",
        "monday": "No meetings scheduled",
    }

    return schedules.get(date.lower(), f"No events found for {date}")

def translator(text, target_lang):
    """Translate text (simulated)"""
    translations = {
        "hello": {"spanish": "Hola", "french": "Bonjour", "german": "Hallo"},
        "goodbye": {"spanish": "Adiós", "french": "Au revoir", "german": "Auf Wiedersehen"},
    }

    text_lower = text.lower()
    if text_lower in translations and target_lang.lower() in translations[text_lower]:
        return translations[text_lower][target_lang.lower()]

    return f"Translation: {text} -> {target_lang} (simulated)"

def unit_converter(value, from_unit, to_unit):
    """Convert between units"""
    conversions = {
        ("km", "miles"): lambda x: x * 0.621371,
        ("miles", "km"): lambda x: x * 1.60934,
        ("kg", "lbs"): lambda x: x * 2.20462,
        ("lbs", "kg"): lambda x: x * 0.453592,
        ("celsius", "fahrenheit"): lambda x: x * 9/5 + 32,
        ("fahrenheit", "celsius"): lambda x: (x - 32) * 5/9,
    }

    key = (from_unit.lower(), to_unit.lower())
    if key in conversions:
        result = conversions[key](float(value))
        return f"{value} {from_unit} = {result:.2f} {to_unit}"

    raise Exception(f"Conversion from {from_unit} to {to_unit} not supported")


In [None]:
# Usage Examples
print("="*70)
print("Toolformer Agentic Pattern : Self-Teaching Tool Usage")
print("="*70)

agent = ToolformerAgent()

# Register tools
agent.add_tool("calculator", calculator, "Perform mathematical calculations")
agent.add_tool("search_web", search_web, "Search for current information")
agent.add_tool("calendar_check", calendar_check, "Check calendar and schedule")
agent.add_tool("translator", translator, "Translate text between languages")
agent.add_tool("unit_converter", unit_converter, "Convert between units")

# Example 1: Math query - should learn calculator is useful
print("\n" + "="*70)
print("EXAMPLE 1: Math Query")
print("="*70)
agent.run("What is 458 multiplied by 23 plus 1500?")

# Example 2: Current info - should learn search is useful
print("\n" + "="*70)
print("EXAMPLE 2: Current Information Query")
print("="*70)
agent.run("What's the latest news about AI?")

# Example 3: Scheduling - should learn calendar is useful
print("\n" + "="*70)
print("EXAMPLE 3: Schedule Query")
print("="*70)
agent.run("Do I have any meetings tomorrow?")

# Example 4: Translation - should learn translator is useful
print("\n" + "="*70)
print("EXAMPLE 4: Translation Query")
print("="*70)
agent.run("How do you say hello in Spanish?")

# Example 5: Unit conversion - should learn converter is useful
print("\n" + "="*70)
print("EXAMPLE 5: Conversion Query")
print("="*70)
agent.run("Convert 100 kilometers to miles")

# Example 6: Test learned knowledge - should now prefer successful tools
print("\n" + "="*70)
print("EXAMPLE 6: Testing Learned Knowledge")
print("="*70)
agent.run("Calculate 789 divided by 3")

# Show what the agent learned
agent.show_learned_knowledge()