In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

# Configure API keys - replace with your actual keys
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'  # This triggers observability
os.environ['LANGCHAIN_PROJECT'] = 'langsmith-tutorial-demo' #This is the project name where the traces will be stored

# Verify configuration
required_vars = ['OPENAI_API_KEY', 'LANGCHAIN_API_KEY']
for var in required_vars:
    if not os.getenv(var) or 'your_' in os.getenv(var, ''):
        print(f"Warning: {var} needs your actual key")
    else:
        print(f"✓ {var} configured")

print(f"\nLangSmith Project: {os.getenv('LANGCHAIN_PROJECT')}")
print("\nTracing is now active - all AI operations will be logged for analysis")
print("Visit https://smith.langchain.com to see your traces")

✓ OPENAI_API_KEY configured
✓ LANGCHAIN_API_KEY configured

LangSmith Project: langsmith-tutorial-demo

Tracing is now active - all AI operations will be logged for analysis
Visit https://smith.langchain.com to see your traces


In [2]:
from typing import TypedDict, List
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, END
from langchain_core.tools import tool
import requests
import time
import json

# Initialize the language model with deterministic settings
# Using temperature=0 ensures consistent responses, making it easier to analyze patterns
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

print("Language model initialized with temperature=0 for consistent behavior")
print("All LLM calls will be automatically traced in LangSmith")

Language model initialized with temperature=0 for consistent behavior
All LLM calls will be automatically traced in LangSmith


In [3]:
class AgentState(TypedDict):
    """Simple state that flows through our agent workflow."""
    user_question: str        # The original question from the user
    needs_search: bool        # Whether we determined search is needed
    search_result: str        # Result from our search tool (if used)
    final_answer: str         # The response we'll give to the user
    reasoning: str            # Why we made our decisions (great for observability)

print("Agent state schema defined")
print("This structured state enables LangSmith to track information flow")

Agent state schema defined
This structured state enables LangSmith to track information flow


In [4]:
@tool
def wikipedia_search(query: str) -> str:
    """Search Wikipedia for current information about a topic."""
    try:
        # Use Wikipedia's proper search API that can handle general queries
        # This is different from the page summary API which requires exact page titles
        search_url = "https://en.wikipedia.org/w/api.php"
        search_params = {
            "action": "query",
            "list": "search",
            "srsearch": query,
            "format": "json",
            "srlimit": 3  # Get top 3 results
        }
        
        response = requests.get(search_url, params=search_params, timeout=10)
        
        if response.status_code == 200:
            data = response.json()
            search_results = data.get('query', {}).get('search', [])
            
            if search_results:
                # Get the most relevant result and fetch its summary
                top_result = search_results[0]
                page_title = top_result['title']
                
                # Now get the page summary using the exact title
                summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_title.replace(' ', '_')}"
                summary_response = requests.get(summary_url, timeout=10)
                
                if summary_response.status_code == 200:
                    summary_data = summary_response.json()
                    extract = summary_data.get('extract', 'No summary available')
                    # Truncate for readability in traces
                    return f"Found information about '{page_title}': {extract[:400]}..."
                else:
                    return f"Found '{page_title}' but couldn't retrieve summary"
            else:
                return f"No Wikipedia articles found for '{query}'"
        else:
            return f"Wikipedia search failed with status {response.status_code}"
            
    except Exception as e:
        # This error handling will show up in LangSmith traces
        return f"Search error: {str(e)}"

print("Search tool created with proper Wikipedia search API integration")
print("Tool execution timing and results will be captured automatically")

Search tool created with proper Wikipedia search API integration
Tool execution timing and results will be captured automatically


In [5]:
def decide_search_need(state: AgentState) -> AgentState:
    """Analyze the question and decide if we need to search for current information."""
    user_question = state["user_question"]
    
    # This prompt engineering is visible in LangSmith traces
    # Notice how we're asking for a structured response to make parsing reliable
    decision_prompt = f"""
    Analyze this question and decide if it requires current/recent information that might not be in your training data:
    
    Question: "{user_question}"
    
    Consider:
    - Does this ask about recent events, current prices, or breaking news?
    - Does this ask about people, companies, or topics that change frequently?
    - Can you answer this well using your existing knowledge?
    
    Respond with exactly "SEARCH" if you need current information, or "DIRECT" if you can answer directly.
    Then on a new line, briefly explain your reasoning.
    """
    
    response = llm.invoke([SystemMessage(content=decision_prompt)])
    decision_text = response.content.strip()
    
    # Parse the response - this parsing logic will be visible in traces
    lines = decision_text.split('\n')
    decision = lines[0].strip()
    reasoning = lines[1] if len(lines) > 1 else "No reasoning provided"
    
    # Update state with our decision
    state["needs_search"] = decision == "SEARCH"
    state["reasoning"] = f"Decision: {decision}. Reasoning: {reasoning}"
    
    # This print statement will help you see the flow during execution
    print(f"Decision: {'SEARCH' if state['needs_search'] else 'DIRECT'} - {reasoning}")
    
    return state

In [6]:
def execute_search(state: AgentState) -> AgentState:
    """Execute search if needed, otherwise skip this step."""
    if not state["needs_search"]:
        print("Skipping search - not needed for this question")
        state["search_result"] = "No search performed"
        return state
    
    print(f"Executing search for: {state['user_question']}")
    
    # Execute our search tool - this will show up as a separate step in LangSmith
    # The .invoke() call will be traced with full input/output details
    search_result = wikipedia_search.invoke({"query": state["user_question"]})
    state["search_result"] = search_result
    
    print(f"Search completed: {len(search_result)} characters returned")
    
    return state

In [7]:
def generate_response(state: AgentState) -> AgentState:
    """Generate the final response using all available information."""
    user_question = state["user_question"]
    search_result = state.get("search_result", "")
    used_search = state["needs_search"]
    
    # Build context for the response
    # This conditional logic creates different prompt patterns that LangSmith will capture
    if used_search and "Search error" not in search_result:
        context = f"Question: {user_question}\n\nSearch Results: {search_result}"
        response_prompt = f"""
        Answer the user's question using both your knowledge and the search results provided.
        
        {context}
        
        Provide a helpful, accurate response that synthesizes the information.
        """
    else:
        response_prompt = f"""
        Answer this question using your existing knowledge:
        
        {user_question}
        
        Provide a helpful, accurate response.
        """
    
    # This LLM call will be traced with the complete prompt and response
    response = llm.invoke([SystemMessage(content=response_prompt)])
    state["final_answer"] = response.content
    
    print(f"Response generated: {len(response.content)} characters")
    
    return state

In [8]:
# Build the workflow graph
workflow = StateGraph(AgentState)

# Add our three workflow steps
# Each of these will appear as a distinct node in LangSmith's graph visualization
workflow.add_node("decide", decide_search_need)
workflow.add_node("search", execute_search)
workflow.add_node("respond", generate_response)

# Define the flow with conditional logic
# LangSmith will show you which edges were traversed for each execution
workflow.set_entry_point("decide")
workflow.add_edge("decide", "search")     # Always go to search step (it will skip if not needed)
workflow.add_edge("search", "respond")    # Then generate response
workflow.add_edge("respond", END)         # Finish

# Compile into an executable agent
simple_agent = workflow.compile()

print("Workflow compiled successfully")
print("Flow: decide → search → generate_response")
print("Ready to demonstrate LangSmith observability")

Workflow compiled successfully
Flow: decide → search → generate_response
Ready to demonstrate LangSmith observability


In [9]:
def run_test_with_observability(question: str, test_type: str) -> dict:
    """Run a test and capture comprehensive observability data."""
    print(f"\n{'='*60}")
    print(f"Testing: {question}")
    print(f"Type: {test_type}")
    print(f"{'='*60}")
    
    start_time = time.time()
    
    # Initialize state for this test
    initial_state = {
        "user_question": question,
        "needs_search": False,
        "search_result": "",
        "final_answer": "",
        "reasoning": ""
    }
    
    try:
        # Execute with metadata for LangSmith
        # This metadata will help you filter and analyze traces later
        config = {
            "metadata": {
                "test_type": test_type,
                "tutorial": "langsmith-observability"
            },
            "tags": ["tutorial", "demo", test_type]
        }
        
        # This invoke call will create a complete trace in LangSmith
        final_state = simple_agent.invoke(initial_state, config=config)
        
        end_time = time.time()
        total_time = end_time - start_time
        
        # Display results for immediate feedback
        print(f"\nResults:")
        print(f"   Decision Process: {final_state['reasoning']}")
        print(f"   Used Search: {'Yes' if final_state['needs_search'] else 'No'}")
        print(f"   Response Length: {len(final_state['final_answer'])} characters")
        print(f"   Total Time: {total_time:.2f} seconds")
        print(f"\nAnswer: {final_state['final_answer'][:200]}...")
        
        return {
            "question": question,
            "type": test_type,
            "success": True,
            "used_search": final_state['needs_search'],
            "total_time": round(total_time, 2),
            "reasoning": final_state['reasoning']
        }
        
    except Exception as e:
        print(f"Error: {str(e)}")
        return {
            "question": question,
            "type": test_type,
            "success": False,
            "error": str(e)
        }

In [10]:
# Define our test cases
test_cases = [
    {
        "question": "What is the capital of France?",
        "type": "direct_answer",
        "expected_search": False
    },
    {
        "question": "What happened in the 2024 US presidential election?",
        "type": "current_info",
        "expected_search": True
    },
    {
        "question": "Tell me about artificial intelligence",
        "type": "factual_lookup",
        "expected_search": False  # Should be answerable directly
    }
]

print("Starting LangSmith Observability Demo")
print("Each test will generate detailed traces in your LangSmith dashboard")
print("Visit https://smith.langchain.com to see real-time traces\n")

test_results = []

for i, test_case in enumerate(test_cases, 1):
    print(f"\nRunning Test {i}/{len(test_cases)}")
    
    result = run_test_with_observability(
        test_case["question"], 
        test_case["type"]
    )
    
    test_results.append(result)
    
    # Small delay to make the traces easier to distinguish in LangSmith
    time.sleep(1)

print(f"\n\nAll tests completed")
print(f"Generated {len(test_results)} traces in LangSmith")
print(f"Check your dashboard to explore the detailed execution data")

Starting LangSmith Observability Demo
Each test will generate detailed traces in your LangSmith dashboard
Visit https://smith.langchain.com to see real-time traces


Running Test 1/3

Testing: What is the capital of France?
Type: direct_answer
Decision: DIRECT - The capital of France is Paris, which is a well-established fact that does not change over time.
Skipping search - not needed for this question
Response generated: 31 characters

Results:
   Decision Process: Decision: DIRECT. Reasoning: The capital of France is Paris, which is a well-established fact that does not change over time.
   Used Search: No
   Response Length: 31 characters
   Total Time: 2.58 seconds

Answer: The capital of France is Paris....

Running Test 2/3

Testing: What happened in the 2024 US presidential election?
Type: current_info
Decision: SEARCH - 
Executing search for: What happened in the 2024 US presidential election?
Search completed: 448 characters returned
Response generated: 518 characters

Result