# Interactive Agent Demo

This notebook demonstrates the FunctionGemma Agent's capabilities:
- ReAct reasoning loop (Think-Act-Observe)
- Tool execution
- RAG (Retrieval-Augmented Generation)
- Request tracing and visualization

In [None]:
# Install required packages if running in Colab
# !pip install httpx matplotlib seaborn pandas

import json
import asyncio
import time
from datetime import datetime
from typing import Dict, Any, List
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from IPython.display import HTML, display
import httpx

# Configure plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## 1. Setup API Client

In [None]:
class AgentClient:
    """Client for interacting with FunctionGemma Agent API."""
    
    def __init__(self, base_url: str = "http://localhost:8000", api_key: str = None):
        self.base_url = base_url
        self.api_key = api_key
        self.session = httpx.AsyncClient(
            base_url=base_url,
            headers={"Authorization": f"Bearer {api_key}"} if api_key else None
        )
    
    async def chat(self, prompt: str, session_id: str = None) -> Dict[str, Any]:
        """Send a chat request to the agent."""
        start_time = time.time()
        
        response = await self.session.post(
            "/api/v1/chat",
            json={"prompt": prompt, "session_id": session_id}
        )
        response.raise_for_status()
        
        result = response.json()
        result["total_time"] = time.time() - start_time
        
        return result
    
    async def health(self) -> bool:
        """Check if the agent is healthy."""
        try:
            response = await self.session.get("/api/v1/health")
            return response.status_code == 200
        except:
            return False
    
    async def close(self):
        """Close the HTTP session."""
        await self.session.aclose()

# Initialize client
# Update these values for your setup
BASE_URL = "http://localhost:8000"  # or "https://your-agent.example.com"
API_KEY = "your-api-key-here"  # Get from .env or admin

client = AgentClient(BASE_URL, API_KEY)

# Test connection
if await client.health():
    print("‚úÖ Agent is healthy and ready!")
else:
    print("‚ùå Agent is not responding. Check if it's running.")

## 2. Basic Interaction

In [None]:
# Simple question
response = await client.chat("What is the capital of France?")

print("Prompt:", "What is the capital of France?")
print("\nResponse:", response["response"])
print("\nExecution time:", f"{response['execution_time_ms']:.2f}ms")
print("Total time:", f"{response['total_time']:.2f}s")

## 3. ReAct Reasoning Visualization

In [None]:
def visualize_reasoning_trace(response: Dict[str, Any]):
    """Visualize the ReAct reasoning trace."""
    
    # Create a timeline visualization
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Extract tool calls
    tool_calls = response.get("tool_calls", [])
    
    if not tool_calls:
        print("No tool calls in this response.")
        return
    
    # Prepare data for visualization
    steps = []
    step_types = []
    durations = []
    
    for i, call in enumerate(tool_calls):
        steps.append(f"Step {i+1}")
        step_types.append(call.get("tool", "unknown"))
        durations.append(call.get("duration_ms", 0))
    
    # Create color map for different tool types
    unique_tools = list(set(step_types))
    colors = plt.cm.Set3(range(len(unique_tools)))
    tool_colors = {tool: colors[i] for i, tool in enumerate(unique_tools)}
    
    # Plot the timeline
    bars = ax.bar(steps, durations, color=[tool_colors[t] for t in step_types])
    
    # Customize the plot
    ax.set_title("ReAct Reasoning Timeline", fontsize=16, fontweight="bold")
    ax.set_xlabel("Reasoning Steps", fontsize=12)
    ax.set_ylabel("Duration (ms)", fontsize=12)
    
    # Add legend
    legend_elements = [plt.Rectangle((0,0),1,1, facecolor=tool_colors[tool], label=tool) 
                       for tool in unique_tools]
    ax.legend(handles=legend_elements, loc="upper right")
    
    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
               f'{height:.0f}ms',
               ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed trace
    print("\nüìã Detailed Reasoning Trace:")
    print("=" * 50)
    for i, call in enumerate(tool_calls, 1):
        print(f"\nStep {i}: {call.get('tool', 'unknown')}")
        print(f"  Arguments: {call.get('arguments', {})}")
        print(f"  Result: {call.get('result', {})[:100]}...")
        print(f"  Duration: {call.get('duration_ms', 0):.2f}ms")

# Test with a complex query that requires tools
complex_query = "Check the status of all pods in the kubernetes cluster and tell me if any are failing"
response = await client.chat(complex_query)

print("\nü§ñ Agent Response:")
print(response["response"])

# Visualize the reasoning
visualize_reasoning_trace(response)

## 4. RAG Demo - Knowledge Retrieval

In [None]:
def demonstrate_rag(query: str):
    """Demonstrate RAG retrieval with context highlighting."""
    
    # Send a query that requires knowledge retrieval
    response = await client.chat(query)
    
    print("\nüìö RAG Demonstration")
    print("=" * 50)
    print(f"Query: {query}")
    print(f"\nResponse: {response['response']}")
    
    # Check if RAG was used (this would require the API to return retrieval info)
    # For demo purposes, we'll simulate the retrieved context
    
    # Simulated retrieved chunks
    retrieved_chunks = [
        {
            "content": "Kubernetes pods are the smallest deployable units in Kubernetes. They can have one or more containers.",
            "score": 0.95
        },
        {
            "content": "Pod status can be Running, Pending, Succeeded, Failed, or Unknown. Failed pods need investigation.",
            "score": 0.89
        },
        {
            "content": "Use 'kubectl get pods' to check pod status. Look for pods with status other than Running.",
            "score": 0.87
        }
    ]
    
    print("\nüìÑ Retrieved Context:")
    print("-" * 50)
    
    for i, chunk in enumerate(retrieved_chunks, 1):
        print(f"\nChunk {i} (Score: {chunk['score']:.2f}):")
        print(f"  {chunk['content']}")
    
    # Visualize retrieval scores
    fig, ax = plt.subplots(figsize=(10, 4))
    
    chunk_ids = [f"Chunk {i}" for i in range(1, len(retrieved_chunks) + 1)]
    scores = [c["score"] for c in retrieved_chunks]
    
    bars = ax.bar(chunk_ids, scores, color="skyblue")
    ax.set_title("RAG Retrieval Similarity Scores", fontsize=14, fontweight="bold")
    ax.set_ylabel("Similarity Score", fontsize=12)
    ax.set_ylim(0, 1)
    
    # Add value labels
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
               f'{height:.2f}',
               ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

# Test RAG with a domain-specific question
rag_query = "How do I check if my Kubernetes application is healthy?"
demonstrate_rag(rag_query)

## 5. Multi-Turn Conversation

In [None]:
async def multi_turn_conversation():
    """Demonstrate multi-turn conversation with context."""
    
    session_id = f"demo-session-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
    
    conversation = [
        "Create a Python class called Calculator with add and subtract methods",
        "Now add a multiply method to the Calculator class",
        "Show me how to use the Calculator class to compute (5 + 3) * 2",
    ]
    
    print("\nüí¨ Multi-Turn Conversation")
    print("=" * 50)
    
    for i, prompt in enumerate(conversation, 1):
        print(f"\nTurn {i}:")
        print(f"User: {prompt}")
        
        response = await client.chat(prompt, session_id=session_id)
        print(f"Agent: {response['response']}")
        print(f"(Execution time: {response['execution_time_ms']:.2f}ms)")
        
        # Add a small delay for better visualization
        await asyncio.sleep(0.5)

# Run multi-turn conversation
await multi_turn_conversation()

## 6. Performance Analysis

In [None]:
async def benchmark_performance(queries: List[str], num_runs: int = 3):
    """Benchmark agent performance on multiple queries."""
    
    results = []
    
    print(f"\n‚ö° Performance Benchmark ({num_runs} runs each)")
    print("=" * 50)
    
    for query in queries:
        times = []
        tool_counts = []
        
        for run in range(num_runs):
            response = await client.chat(query)
            times.append(response["execution_time_ms"])
            tool_counts.append(len(response.get("tool_calls", [])))
        
        avg_time = sum(times) / len(times)
        avg_tools = sum(tool_counts) / len(tool_counts)
        
        results.append({
            "query": query[:50] + "..." if len(query) > 50 else query,
            "avg_time_ms": avg_time,
            "avg_tools": avg_tools,
            "min_time_ms": min(times),
            "max_time_ms": max(times)
        })
        
        print(f"\nQuery: {query[:50]}...")
        print(f"  Avg time: {avg_time:.2f}ms (min: {min(times):.2f}, max: {max(times):.2f})")
        print(f"  Avg tools used: {avg_tools:.1f}")
    
    # Create performance visualization
    df = pd.DataFrame(results)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Execution time plot
    ax1.bar(range(len(df)), df["avg_time_ms"], yerr=[
        df["avg_time_ms"] - df["min_time_ms"],
        df["max_time_ms"] - df["avg_time_ms"]
    ], capsize=5, color="lightcoral")
    ax1.set_title("Execution Time by Query", fontweight="bold")
    ax1.set_ylabel("Time (ms)")
    ax1.set_xticks(range(len(df)))
    ax1.set_xticklabels([f"Q{i+1}" for i in range(len(df))], rotation=45)
    
    # Tool usage plot
    ax2.bar(range(len(df)), df["avg_tools"], color="lightblue")
    ax2.set_title("Average Tools Used", fontweight="bold")
    ax2.set_ylabel("Number of Tools")
    ax2.set_xticks(range(len(df)))
    ax2.set_xticklabels([f"Q{i+1}" for i in range(len(df))], rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    return df

# Define test queries
test_queries = [
    "What is 2 + 2?",
    "Check the status of Kubernetes pods",
    "Write a Python function to fetch data from an API",
    "Explain microservices architecture",
]

# Run benchmark
performance_df = benchmark_performance(test_queries)

# Display summary statistics
print("\nüìä Performance Summary:")
print(f"Average execution time: {performance_df['avg_time_ms'].mean():.2f}ms")
print(f"Fastest query: {performance_df['avg_time_ms'].min():.2f}ms")
print(f"Slowest query: {performance_df['avg_time_ms'].max():.2f}ms")

## 7. Error Handling and Edge Cases

In [None]:
async def test_edge_cases():
    """Test agent behavior with edge cases."""
    
    edge_cases = [
        ("", "Empty prompt"),
        ("A" * 10000, "Very long prompt"),
        ("üöÄüåüüí´", "Emojis only"),
        ("invalid_json{{{", "Malformed request in prompt"),
    ]
    
    print("\nüß™ Edge Case Testing")
    print("=" * 50)
    
    for prompt, description in edge_cases:
        try:
            response = await client.chat(prompt)
            print(f"\n‚úÖ {description}: SUCCESS")
            print(f"   Response length: {len(response['response'])} chars")
            print(f"   Execution time: {response['execution_time_ms']:.2f}ms")
        except Exception as e:
            print(f"\n‚ùå {description}: FAILED")
            print(f"   Error: {str(e)}")

# Test edge cases
await test_edge_cases()

## 8. Clean Up

In [None]:
# Close the client session
await client.close()
print("\n‚úÖ Demo completed successfully!")
print("\nKey takeaways:")
print("- Agent uses ReAct reasoning for complex tasks")
print("- Tools are executed based on reasoning")
print("- RAG provides relevant context")
print("- Performance varies with query complexity")
print("- System handles edge cases gracefully")