In [11]:
import os
from grounded_ai import Evaluator

In [None]:
# Verify API key
if not os.getenv("ANTHROPIC_API_KEY"):
    print("Error: ANTHROPIC_API_KEY environment variable not set.")

os.environ["ANTHROPIC_API_KEY"] = "YOUR_ANTHROPIC_KEY"

In [18]:
# Initialize with a specific Anthropic model
# Uses the 'anthropic/' prefix to trigger auto-detection
evaluator = Evaluator(model="anthropic/claude-haiku-4-5-20251001")

context = """
The Apollo 11 mission landed the first humans on the Moon.
Neil Armstrong and Buzz Aldrin walked on the lunar surface.
Michael Collins remained in orbit in the Command Module.
"""

# Test Case 1: Accurate
query = "Who stayed in orbit?"
response_accurate = "Michael Collins remained in orbit."

In [24]:
from pprint import pprint
import json

result = evaluator.evaluate(query=query, context=context, response=response_accurate)

pprint(json.loads(result.model_dump_json()))

{'confidence': 0.98,
 'label': 'faithful',
 'reasoning': 'The content accurately states that Michael Collins remained in '
              "orbit, which directly answers the query 'Who stayed in orbit?' "
              'and is fully supported by the context provided. The statement '
              'is a direct extraction from the given context with no '
              'hallucinations or inaccuracies.',
 'score': 0.95}


In [26]:
# Test Case 2: Inaccurate
response_inaccurate = "Buzz Aldrin stayed in the orbiter while Neil went down alone."

result = evaluator.evaluate(query=query, context=context, response=response_inaccurate)

pprint(json.loads(result.model_dump_json()))

{'confidence': 0.95,
 'label': 'hallucination',
 'reasoning': 'The content contradicts the provided context. According to the '
              'context, Michael Collins remained in orbit in the Command '
              'Module, while Neil Armstrong and Buzz Aldrin walked on the '
              'lunar surface. The evaluated content incorrectly states that '
              'Buzz Aldrin stayed in the orbiter while Neil went down alone. '
              'Buzz Aldrin actually descended to the lunar surface with Neil '
              'Armstrong. Only Michael Collins stayed in orbit.',
 'score': 0.1}


In [29]:
from pydantic import BaseModel, Field
from typing import List

# Test Case 3: Code Review (Custom Schema Override)


# 1. Define Custom I/O Schemas
class CodeReviewInput(BaseModel):
    code: str
    language: str


class CodeReviewOutput(BaseModel):
    is_bug_free: bool
    security_risk: bool
    suggestions: List[str]
    complexity_score: int = Field(description="1-10 complexity rating")


# 2. Define Input Data
# DANGEROUS: SQL Injection vulnerability
code_sample = """
def execute_query(user_input):
    query = f"SELECT * FROM users WHERE name = '{user_input}'"
    db.execute(query)
"""
review_input = CodeReviewInput(code=code_sample, language="python")

# 3. Evaluate with Override
review_result = evaluator.evaluate(
    input_data=review_input, output_schema=CodeReviewOutput
)

pprint(json.loads(review_result.model_dump_json()))

{'complexity_score': 2,
 'is_bug_free': True,
 'security_risk': True,
 'suggestions': ['Replace string interpolation with parameterized queries to '
                 'prevent SQL injection attacks',
                 'Use prepared statements with placeholders (e.g., '
                 "db.execute('SELECT * FROM users WHERE name = ?', "
                 '(user_input,)))',
                 'Never concatenate user input directly into SQL queries',
                 'Consider using an ORM like SQLAlchemy to abstract database '
                 'queries and provide built-in protection against SQL '
                 'injection',
                 'Add input validation and sanitization as a defense-in-depth '
                 'measure']}
