In [2]:
import os
import base64
from dotenv import load_dotenv

load_dotenv(override=True)
# Get keys for your project from the project settings page: https://cloud.langfuse.com
os.environ["LANGFUSE_PUBLIC_KEY"] = os.getenv("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.getenv("LANGFUSE_SECRET_KEY")
os.environ["LANGFUSE_HOST"] = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")

# Build Basic Auth header.
LANGFUSE_AUTH = base64.b64encode(
    f"{os.environ.get('LANGFUSE_PUBLIC_KEY')}:{os.environ.get('LANGFUSE_SECRET_KEY')}".encode()
).decode()
 
# Configure OpenTelemetry endpoint & headers
os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = os.environ.get("LANGFUSE_HOST") + "/api/public/otel"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {LANGFUSE_AUTH}"

# Your openai key
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
from langfuse import get_client
 
langfuse = get_client()
 
# Verify connection
if langfuse.auth_check():
    print("Langfuse client is authenticated and ready!")
else:
    print("Authentication failed. Please check your credentials and host.")
    

Langfuse client is authenticated and ready!


In [4]:
import nest_asyncio
nest_asyncio.apply()


In [5]:
import logfire

# Configure logfire instrumentation.
logfire.configure(
    service_name='my_agent_service',

    send_to_logfire=False,
)
# This method automatically patches the OpenAI Agents SDK to send logs via OTLP to Langfuse.
logfire.instrument_openai_agents()

Overriding of current TracerProvider is not allowed


In [6]:
import asyncio
from agents import Agent, Runner

async def main():
    agent = Agent(
        name="Assistant",
        instructions="You are a senior software engineer",
    )

    result = await Runner.run(agent, "Tell me why it is important to evaluate AI agents.")
    print(result.final_output)

loop = asyncio.get_running_loop()
await loop.create_task(main())

langfuse.flush()

It is important to evaluate AI agents for several key reasons:

1. **Reliability:**  
Evaluation ensures that AI agents behave as intended and consistently produce reliable and accurate results. This is crucial in applications where incorrect decisions could have significant consequences.

2. **Safety:**  
AI agents may cause harm if they take inappropriate actions, especially in safety-critical domains such as healthcare, finance, autonomous vehicles, or security systems. Evaluating AI agents helps identify and mitigate potential risks.

3. **Fairness and Bias:**  
AI systems can inadvertently learn and reinforce biases present in their training data. Continuous evaluation helps identify and correct unfair or biased behavior, supporting ethical and equitable use.

4. **Performance Measurement:**  
Evaluation allows organizations to measure the effectiveness, efficiency, and scalability of AI agents. This is vital for optimizing models, ensuring satisfactory user experiences, and meeti