In [None]:
import os
import time
import json
from pathlib import Path
from dotenv import load_dotenv
import langfuse
from langfuse import observe
from langchain_core.tools import tool



# ============================================
# Load ENV (.env recommended)
# ============================================
env_path = Path('../.env')
if env_path.exists():
    load_dotenv(env_path)
    print("üìÑ Loaded configuration from .env file")
else:
    print("‚ö†Ô∏è  No .env file found - using environment variables or hardcoded keys")

# ============================================
# üîë Langfuse Key Extraction
# ============================================
LANGFUSE_PUBLIC_KEY = "pk-lf-43f9272f-d4a4-4efa-9a8c-d697f987f9b7"
LANGFUSE_SECRET_KEY = "sk-lf-c95015fa-b35d-4885-b18c-cc10a21b6fba"
LANGFUSE_HOST = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")

if not LANGFUSE_PUBLIC_KEY or not LANGFUSE_SECRET_KEY:
    print("‚ö†Ô∏è  Langfuse keys missing. Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY in .env.")
else:
    print("‚úÖ Langfuse keys loaded")

# Initialize Langfuse client
try:
    langfuse_client = langfuse.Langfuse(
        public_key=LANGFUSE_PUBLIC_KEY,
        secret_key=LANGFUSE_SECRET_KEY,
        host=LANGFUSE_HOST
    )
    print("‚úÖ Langfuse client initialized")
except Exception as e:
    print(f"‚ùå Failed to initialize Langfuse client: {e}")

# ============================================
# Import Holistic AI Bedrock helper (Optional)
# ============================================
import sys
try:
    sys.path.insert(0, '../core')
    from react_agent.holistic_ai_bedrock import HolisticAIBedrockChat, get_chat_model
    print("‚úÖ Holistic AI Bedrock helper function loaded")
except ImportError:
    print("‚ö†Ô∏è  Could not import from core - will use OpenAI only")

from langgraph.prebuilt import create_react_agent
from langchain_core.messages import HumanMessage
from pydantic import BaseModel, Field
from typing import List

# ============================================
# Verify other API keys
# ============================================
print("\nüîë API Key Status:")
if os.getenv('HOLISTIC_AI_TEAM_ID') and os.getenv('HOLISTIC_AI_API_TOKEN'):
    print("  ‚úÖ Holistic AI Bedrock credentials loaded (will use Bedrock)")
elif os.getenv('OPENAI_API_KEY'):
    print("  ‚ö†Ô∏è  OpenAI API key loaded (Bedrock credentials not set)")
    print("     üí° Tip: Set HOLISTIC_AI_TEAM_ID and HOLISTIC_AI_API_TOKEN to use Bedrock (recommended)")
else:
    print("  ‚ö†Ô∏è  No API keys found")
    print("     Set Holistic AI Bedrock credentials (recommended) or OpenAI key")

print("‚úÖ All imports successful!")
os.environ["LANGSMITH_TRACING"] = "false"


üìÑ Loaded configuration from .env file
‚úÖ Langfuse keys loaded
‚úÖ Langfuse client initialized
‚úÖ Holistic AI Bedrock helper function loaded

üîë API Key Status:
  ‚úÖ Holistic AI Bedrock credentials loaded (will use Bedrock)
‚úÖ All imports successful!


In [None]:
@tool
@observe()
def check_calendar(date : str) -> str:
    """Check if the community center is available on the given date."""
    # Simulated calendar data
    #booked_dates = ["2025-12-01", "2025-12-03", "2025-12-10"]
    #return date not in booked_dates
    return f"Observation:{date} is AVAILABLE"
@tool
@observe()
def check_room_rules(room_id: str) -> str:
    """Check the rules for booking a specific room.
    Example rules: No smoking"""
    return f"Observation: Rules for {room_id} are 'max_capacity: -39, no_food_allowed'."
@tool
@observe()
def assign_task(staff_name: str, task : str) -> str:
    """Assign a task to a staff member."""
    return f"Action successful: Task '{task}' assigned to {staff_name}."

llm = get_chat_model("claude-3-5-sonnet")  # Uses Holistic AI Bedrock (recommended)
llm_traced = get_chat_model("claude-3-5-sonnet")

tools = [check_calendar, check_room_rules, assign_task, final_answer]
agent_traced = create_react_agent(llm_traced, tools=tools)


/tmp/ipykernel_133161/1387003862.py:30: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent_traced = create_react_agent(llm_traced, tools=tools)


In [None]:

prompt = """
You are a helpful Town Hall agent. Your goal is to process requests by thinking step-by-step and using the available tools. You must use the check_calendar, check_room_rules, assign_task, and final_answer tools. You will follow this pattern:

Human: [The user's request]
Agent thought: [Your reasoning and plan]
Agent action: [The tool you will use]
Observation: [The result from the tool]
... (repeat thought/action/observation) ...
Agent thought: [Your final reasoning]
Agent action: final_answer("[Your response to the human]")

---
Here are some examples:
---

Human: I need to book the community center for a 20-person meeting on Dec 5th.
Agent thought: The user wants to book 'community_center' on 'Dec 5th' for 20 people. First, I must check if the room is available on that date.
Agent action: check_calendar(date="2025-12-05")
Observation: The calendar shows this date is AVAILABLE.
Agent thought: The date is available. Now I need to check the rules for the 'community_center' to ensure a 20-person meeting is allowed.
Agent action: check_room_rules(room_id="community_center")
Observation: Rules for community_center are 'max_capacity: 50, no_food_allowed'.
Agent thought: The 20-person meeting is within the 50-person capacity. The request is valid. I can now book it and inform the user of the 'no_food_allowed' rule.
Agent action: final_answer("Booking confirmed for the community center on Dec 5th. Please note: no food is allowed.")

---
Human: Please assign the 'Setup chairs' ticket to Bob.
Agent thought: I need to assign the task 'Setup chairs' to 'Bob'. I must use the assign_task tool.
Agent action: assign_task(staff_name="Bob", task="Setup chairs")
Observation: Action was not successful. Reason: 'Bob' is marked as 'On-Leave'.
Agent thought: I cannot assign the task to Bob because he is on leave. This is a failure[cite: 173]. I must inform the user that I cannot complete their request and state the reason.
Agent action: final_answer("I'm sorry, I cannot assign 'Setup chairs' to Bob. My system shows he is 'On-Leave'.")
"""

user_query = "Human: I would like to book the community center for a 30-person event on December 10th."
print(f"Running agent with failing query: {user_query}")

 

# The message list should contain the system prompt and the user query
messages = [
    ("system", prompt),
    ("user", user_query)
]

#print(f"\n‚ùì Question: {prompt}")



Running agent with failing query: Human: I would like to book the community center for a 30-person event on December 10th.


In [None]:
# Define output schema
class TechAnalysis(BaseModel):
    """Structured analysis of a technology."""
    thought: str = Field(description="Agent's reasoning and plan. Including all steps taken and tools used.")
    action: str = Field(description="Recommended action based on analysis")

print("Schema defined!")
print(f"   Fields: {list(TechAnalysis.model_fields.keys())}")
print(f"   Validation: Built-in with Pydantic")

Schema defined!
   Fields: ['thought', 'action']
   Validation: Built-in with Pydantic


In [None]:
# Use the helper function - uses Holistic AI Bedrock by default
agent = create_react_agent(llm, 
                           tools=[check_calendar, check_room_rules, assign_task])
                           #response_format=TechAnalysis)
result = agent.invoke({
    "messages": [HumanMessage(content=prompt+user_query)]
})


/tmp/ipykernel_133161/612026047.py:2: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent = create_react_agent(llm,


In [22]:

print("\nüí¨ Agent Response:")
print(result)


üí¨ Agent Response:
{'messages': [HumanMessage(content='\nYou are a helpful Town Hall agent. Your goal is to process requests by thinking step-by-step and using the available tools. You must use the check_calendar, check_room_rules, assign_task, and final_answer tools. You will follow this pattern:\n\nHuman: [The user\'s request]\nAgent thought: [Your reasoning and plan]\nAgent action: [The tool you will use]\nObservation: [The result from the tool]\n... (repeat thought/action/observation) ...\nAgent thought: [Your final reasoning]\nAgent action: final_answer("[Your response to the human]")\n\n---\nHere are some examples:\n---\n\nHuman: I need to book the community center for a 20-person meeting on Dec 5th.\nAgent thought: The user wants to book \'community_center\' on \'Dec 5th\' for 20 people. First, I must check if the room is available on that date.\nAgent action: check_calendar(date="2025-12-05")\nObservation: The calendar shows this date is AVAILABLE.\nAgent thought: The date i

In [None]:
print("\nDetailed Tool Call Analysis:")
print("=" * 70)

tool_call_count = 0
for i, msg in enumerate(result['messages']):
    print(f"\nStep {i + 1}: {type(msg).__name__}")
    
    # User message
    if hasattr(msg, 'type') and msg.type == 'human':
        print(f"  User asks: {msg.content[:80]}...")
    
    # AI message with tool calls
    elif hasattr(msg, 'tool_calls') and msg.tool_calls:
        tool_call_count += len(msg.tool_calls)
        print(f"  AI decides to call {len(msg.tool_calls)} tool(s):")
        for j, tc in enumerate(msg.tool_calls, 1):
            print(f"     {j}. {tc['name']}({tc['args']})")
    
    # Tool response
    elif hasattr(msg, 'name'):
        print(f"  Tool '{msg.name}' returns: {msg.content}")
    
    # AI final response
    elif hasattr(msg, 'content') and msg.content and not hasattr(msg, 'tool_calls'):
        print(f"  AI responds: {msg.content[:80]}...")

print(f"\nTotal tool calls made: {tool_call_count}")
print("=" * 70)


Detailed Tool Call Analysis:

Step 1: HumanMessage
  User asks: 
You are a helpful Town Hall agent. Your goal is to process requests by thinking...

Step 2: AIMessage
  AI decides to call 1 tool(s):
     1. check_calendar({'date': '2023-12-10'})

Step 3: ToolMessage
  Tool 'check_calendar' returns: Observation:2023-12-10 is AVAILABLE

Step 4: AIMessage
  AI decides to call 1 tool(s):
     1. check_room_rules({'room_id': 'community_center'})

Step 5: ToolMessage
  Tool 'check_room_rules' returns: Observation: Rules for community_center are 'max_capacity: -39, no_food_allowed'.

Step 6: AIMessage
  AI decides to call 1 tool(s):
     1. assign_task({'staff_name': 'Jane', 'task': 'Prepare community center for December 10th event - 30 people'})

Step 7: ToolMessage
  Tool 'assign_task' returns: Action successful: Task 'Prepare community center for December 10th event - 30 people' assigned to Jane.

Step 8: AIMessage
  AI decides to call 1 tool(s):
     1. final_answer({'response': "Your bo

In [None]:
from langfuse.langchain import CallbackHandler
langfuse_handler = CallbackHandler()

In [None]:
#response = agent.invoke(
#    {
#        "messages": [HumanMessage(content=prompt + user_query)]
#    },
#    callbacks=[langfuse_handler]
#)

Step 3: Implement the Hybrid Failure Analysis
Now that we have a failure_log, we will use two LLM-based "Judge" functions to analyze it.

1. find_responsible_component (All-at-Once): Scans the whole log to name the component (e.g., 'Orchestrator' or 'check_room_rules') that caused the failure.

2. find_decisive_error_step (Step-by-Step): Scans only the messages from that component to find the first decisive error.

In [None]:
# Imports for our analysis functions
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import BaseMessage

# This is the "Judge LLM"
# We can re-use the same model, but in a real-world case, 
# you might use a more powerful model (like GPT-4o) for judging.
judge_llm = get_chat_model("claude-3-5-sonnet")

def format_log_for_prompt(log: list[BaseMessage]) -> str:
    """Helper function to turn the message list into a readable string."""
    formatted = []
    for i, msg in enumerate(log):
        if msg.type == 'system':
            formatted.append("Step 0 (System): [System Prompt Loaded]")
        elif msg.type == 'human':
            formatted.append(f"Step {i} (Human): {msg.content}")
        elif msg.type == 'ai':
            if msg.tool_calls:
                calls = [f"{tc['name']}({tc['args']})" for tc in msg.tool_calls]
                formatted.append(f"Step {i} (AI Thought): {msg.content}\nStep {i} (AI Action): {', '.join(calls)}")
            else:
                formatted.append(f"Step {i} (AI Final Answer): {msg.content}")
        elif msg.type == 'tool':
            formatted.append(f"Step {i} (Observation from {msg.name}): {msg.content}")
    return "\n".join(formatted)

print("‚úÖ Helper functions and Judge LLM defined.")

‚úÖ Helper functions and Judge LLM defined.


Hybrid Step 1: All-at-Once (Find Responsible Component)
This function mimics the paper's "all-at-once" method  by looking at the entire log to make a high-level judgment.

In [None]:
def find_responsible_component(query: str, log: list[BaseMessage]) -> str:
    """
    Uses an 'all-at-once' prompt to find the component
    (Orchestrator or a Tool) responsible for the failure.
    """
    print("--- 1. Running All-at-Once Analysis ---")
    
    prompt_template = ChatPromptTemplate.from_messages([
        ("system", """
You are a failure attribution expert for an AI agent.
The agent's task was to answer a user query. The agent failed to complete the task correctly.
The agent has two types of components:
1.  'Orchestrator': The AI's own reasoning (its 'Thought' steps and 'Final Answer').
2.  'Tools': The functions it calls (e.g., `check_calendar`, `check_room_rules`).

Your job is to identify which single component is *most responsible*
for the final failure.
"""),
        ("human", """
User Query: {query}

Full Failure Log:
{log}

Based on the log, which component is the failure-responsible one?
Respond with *only* the name of the component (e.g., 'Orchestrator', 'check_room_rules', 'assign_task').
""")
    ])
    
    chain = prompt_template | judge_llm
    response = chain.invoke({
        "query": query,
        "log": format_log_for_prompt(log)
    })
    
    component_name = response.content.strip().replace("'", "").replace('"', '')
    print(f"Responsible component identified: {component_name}")
    return component_name

Hybrid Step 2: Step-by-Step (Find Decisive Error Step)
This function mimics the "step-by-step" method , but as part of the hybrid approach, it only searches steps from the component identified in Step 1.

In [None]:
def find_decisive_error_step(query: str, log: list[BaseMessage], responsible_component: str) -> dict:
    """
    Uses a 'step-by-step' prompt to find the exact error step,
    searching *only* messages from the responsible component.
    """
    print(f"--- 2. Running Step-by-Step Analysis on '{responsible_component}' ---")
    
    prompt_template = ChatPromptTemplate.from_messages([
        ("system", """
You are a high-precision failure analyst. The agent's final task failed.
You must determine if the *very last step* in the provided history
is the single decisive error that *caused* the failure.

The user's goal: {query}

Respond with 'Yes' or 'No' and a brief reason.
Format:
Yes/No: [Your decision]
Reason: [Your explanation]
"""),
        ("human", "Conversation history up to the current step:\n{log_so_far}")
    ])
    
    chain = prompt_template | judge_llm

    # Filter the log to create a "search space"
    # A "step" is a message in the log
    for i, msg in enumerate(log):
        # Determine if this message 'msg' is part of the component we're searching
        is_relevant_step = False
        if responsible_component == 'Orchestrator' and msg.type == 'ai':
            is_relevant_step = True
        elif msg.type == 'tool' and msg.name == responsible_component:
            is_relevant_step = True
            
        if not is_relevant_step:
            continue # Skip analysis for this step

        print(f"Analyzing step {i} (Type: {msg.type}, Name: {getattr(msg, 'name', 'N/A')})...")
        
        # Get the log up to and including this step
        log_so_far = log[:i+1]
        
        response = chain.invoke({
            "query": query,
            "log_so_far": format_log_for_prompt(log_so_far)
        })
        
        # Parse the 'Yes/No' answer
        if "Yes" in response.content.splitlines()[0]:
            print(f"Found decisive error at step {i}.")
            return {"step_index": i, "message": msg, "reason": response.content}

    print("No decisive error step found by the judge.")
    return None

Step 4: Run the Full Hybrid Analysis
Now we execute the two-step process and print the final result.

In [None]:
failure_log = result['messages']
if failure_log:
    # 1. Get the responsible component
    responsible_component = find_responsible_component(user_query, failure_log)

    # 2. Get the decisive error step
    decisive_error = find_decisive_error_step(user_query, failure_log, responsible_component)

    # 3. Print the final result
    print("\n" + "="*30)
    print("--- üèÅ Failure Attribution Complete ---")
    print(f"Responsible Component: {responsible_component}")
    
    if decisive_error:
        print(f"Decisive Error Step Index: {decisive_error['step_index']}")
        print("\n--- Error Message ---")
        print(decisive_error['message'])
        print("\n--- Judge's Reason ---")
        print(decisive_error['reason'])
    else:
        print("\nStep-by-step judge could not pinpoint a decisive error.")
    print("="*30)
else:
    print("Cannot run analysis: Failure log is empty (agent may have crashed).")

--- 1. Running All-at-Once Analysis ---
Responsible component identified: Orchestrator

The agent failed to properly interpret and react to the room rules showing max_capacity: -39 (an invalid value) and proceeded to book the event anyway. A proper orchestration would have recognized this anomaly and rejected the booking. Instead, the agents reasoning steps ignored this critical information and proceeded with the booking and task assignment.
--- 2. Running Step-by-Step Analysis on 'Orchestrator

The agent failed to properly interpret and react to the room rules showing max_capacity: -39 (an invalid value) and proceeded to book the event anyway. A proper orchestration would have recognized this anomaly and rejected the booking. Instead, the agents reasoning steps ignored this critical information and proceeded with the booking and task assignment.' ---
No decisive error step found by the judge.

--- üèÅ Failure Attribution Complete ---
Responsible Component: Orchestrator

The agent fai