# Cortex Agent Evaluation with TruLens

This notebook evaluates the `business_insights_agent` using Snowflake AI Observability (TruLens).

**Why Notebook vs .py file:**
- Keeps Python session alive during trace ingestion
- Prevents premature session termination issues
- Better visibility into each step

## Setup Environment

In [None]:
import os
import time
import datetime

# CRITICAL: Set before any TruLens imports
os.environ['TRULENS_OTEL_TRACING'] = '1'

print("✓ Environment configured")
print(f"  TRULENS_OTEL_TRACING={os.environ['TRULENS_OTEL_TRACING']}")

## Import Libraries

In [None]:
from snowflake.snowpark.context import get_active_session
from trulens.apps.custom import TruApp
from trulens.connectors.snowflake import SnowflakeConnector
from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes
from trulens.core.app import RunConfig

print("✓ Libraries imported")

## Define Agent Wrapper

In [None]:
class SimpleCortexAgent:
    """Simple wrapper that uses SQL to call Cortex Agent"""
    
    def __init__(self, session, agent_name: str):
        self.session = session
        self.agent_name = agent_name
    
    @instrument(
        attributes={
            SpanAttributes.RECORD_ROOT.INPUT: "query",
            SpanAttributes.RECORD_ROOT.OUTPUT: "return",
        }
    )
    def answer_query(self, query: str) -> str:
        """Main entry point - queries agent via SQL"""
        # Use SQL to call the agent - quote the agent name
        sql = f"""
        SELECT SNOWFLAKE.CORTEX.SEND_MESSAGE(
            '{self.agent_name}',
            '{query.replace("'", "''")}'
        ) as response
        """
        
        result = self.session.sql(sql).collect()
        if result and len(result) > 0:
            return result[0]['RESPONSE']
        return "No response"

print("✓ Agent wrapper defined")

## Connect to Snowflake

In [None]:
# Get session from notebook context
session = get_active_session()

# Set context
session.sql("USE DATABASE SNOWFLAKE_INTELLIGENCE").collect()
session.sql("USE SCHEMA AGENTS").collect()

current_role = session.get_current_role()
current_db = session.get_current_database()
current_schema = session.get_current_schema()

print(f"✓ Connected to Snowflake")
print(f"  Role: {current_role}")
print(f"  Database: {current_db}")
print(f"  Schema: {current_schema}")

## Create TruLens Connector

In [None]:
connector = SnowflakeConnector(snowpark_session=session)
print("✓ SnowflakeConnector created")

## Create Agent Instance

In [None]:
app = SimpleCortexAgent(
    session=session,
    agent_name="snowflake_intelligence.agents.business_insights_agent"
)
print("✓ Agent wrapper created")

## Register with AI Observability

In [None]:
tru_app = TruApp(
    app=app,
    main_method=app.answer_query,
    app_name="business_insights_agent",
    app_version="v1.0_notebook",
    connector=connector
)
print("✓ App registered with AI Observability")

## Create Evaluation Dataset

In [None]:
# Create table for evaluation queries
session.sql("""
    CREATE OR REPLACE TABLE SNOWFLAKE_INTELLIGENCE.AGENTS.EVAL_NOTEBOOK_QUERIES (
        USER_QUERY VARCHAR(500)
    )
""").collect()

# Insert test queries
session.sql("""
    INSERT INTO SNOWFLAKE_INTELLIGENCE.AGENTS.EVAL_NOTEBOOK_QUERIES (USER_QUERY)
    VALUES 
        ('What is the total revenue?'),
        ('How many orders were placed?'),
        ('What is the average order value?')
""").collect()

# Verify
result = session.sql("""
    SELECT COUNT(*) as cnt 
    FROM SNOWFLAKE_INTELLIGENCE.AGENTS.EVAL_NOTEBOOK_QUERIES
""").collect()

print(f"✓ Dataset created with {result[0]['CNT']} queries")

## Create Evaluation Run

In [None]:
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

run_config = RunConfig(
    run_name=f"notebook_eval_{timestamp}",
    description="Evaluation from Snowflake Notebook - session persists",
    label="notebook",
    source_type="TABLE",
    dataset_name="SNOWFLAKE_INTELLIGENCE.AGENTS.EVAL_NOTEBOOK_QUERIES",
    dataset_spec={
        "RECORD_ROOT.INPUT": "USER_QUERY"
    },
    llm_judge_name="llama3.1-70b"
)

run = tru_app.add_run(run_config=run_config)
print(f"✓ Run created: {run_config.run_name}")

## Start Evaluation Run

In [None]:
print("Starting run (invoking agent for each query)...")
start_time = time.time()

try:
    run.start()
    elapsed = time.time() - start_time
    print(f"✓ run.start() completed ({elapsed:.1f}s)")
except Exception as e:
    elapsed = time.time() - start_time
    print(f"✗ run.start() failed ({elapsed:.1f}s)")
    print(f"Error: {str(e)}")
    raise

## Check Status After Start

In [None]:
print("Waiting for ingestion (3 seconds)...")
time.sleep(3)

status = run.get_status()
print(f"\nRun Status: {status}")
print(f"Status Value: {status.value}")

## Check Stage for Uploaded Files

In [None]:
# Check if trulens_spans stage exists
try:
    stage_files = session.sql("""
        LIST @SNOWFLAKE_INTELLIGENCE.AGENTS.trulens_spans
    """).collect()
    
    print(f"✓ Stage exists with {len(stage_files)} file(s)")
    if len(stage_files) > 0:
        for file in stage_files[:5]:
            print(f"  - {file['name']}")
    else:
        print("  ⚠ No files found - this indicates the upload bug")
except Exception as e:
    print(f"✗ Stage check failed: {str(e)[:200]}")

## Check AI Observability Events

In [None]:
# Check if any traces were recorded
events = session.sql("""
    SELECT COUNT(*) as event_count
    FROM SNOWFLAKE.LOCAL.AI_OBSERVABILITY_EVENTS
    WHERE APPLICATION_NAME = 'business_insights_agent'
""").collect()

event_count = events[0]['EVENT_COUNT']
print(f"\nAI Observability Events: {event_count}")

if event_count > 0:
    print("✓ Traces recorded successfully!")
else:
    print("⚠ No traces found - upload or ingestion issue")

## Compute Metrics (if invocation completed)

In [None]:
if status.value in ['INVOCATION_COMPLETED', 'INVOCATION_PARTIALLY_COMPLETED']:
    print("Starting metrics computation...")
    run.compute_metrics(metrics=[
        "coherence",
        "answer_relevance"
    ])
    print("✓ Metrics computation triggered")
    
    # Wait and check final status
    print("\nWaiting for metrics computation (10 seconds)...")
    time.sleep(10)
    
    final_status = run.get_status()
    print(f"Final Status: {final_status}")
else:
    print(f"⚠ Status is {status}, cannot compute metrics")
    print("Check dataset and instrumentation")

## Summary

In [None]:
print("="*70)
print("EVALUATION SUMMARY")
print("="*70)
print(f"""Run Name: {run_config.run_name}
Status: {status}
Events Recorded: {event_count}

Check Snowsight:
  AI & ML > Evaluations > business_insights_agent > {run_config.run_name}
  
Or query directly:
  SELECT * FROM SNOWFLAKE.LOCAL.AI_OBSERVABILITY_EVENTS 
  WHERE APPLICATION_NAME = 'business_insights_agent'
  ORDER BY TIMESTAMP DESC;
""")