In [2]:
from openai import OpenAI

openai_client = OpenAI()

In [3]:
from pydantic import BaseModel

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

In [7]:
CalendarEvent.model_json_schema()

{'properties': {'name': {'title': 'Name', 'type': 'string'},
  'date': {'title': 'Date', 'type': 'string'},
  'participants': {'items': {'type': 'string'},
   'title': 'Participants',
   'type': 'array'}},
 'required': ['name', 'date', 'participants'],
 'title': 'CalendarEvent',
 'type': 'object'}

In [8]:
response = openai_client.responses.parse(
    model="gpt-4o-mini",
    input=[
        {"role": "system", "content": "Extract the event information."},
        {
            "role": "user",
            "content": "Alice and Bob are going to a science fair on Friday.",
        },
    ],
    text_format=CalendarEvent,
)

In [15]:
response.output[0].content[0].parsed

CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])

In [17]:
response.output[0].content[0].text

'{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}'

In [18]:
event = response.output_parsed

In [19]:
event

CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])

In [33]:
response = openai_client.responses.parse(
    model="gpt-4o-mini",
    input=[
        {"role": "system", "content": "Extract the event information."},
        {
            "role": "user",
            "content": "Alice and Bob are going to a science fair on Friday.",
        },
    ],
    text_format=CalendarEvent
)
print(response.output_text)

{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}


In [22]:
response = openai_client.responses.create(
    model="gpt-4o-mini",
    input=[
        {"role": "system", "content": "Extract the event information."},
        {
            "role": "user",
            "content": "Alice and Bob are going to a science fair on Friday.",
        },
    ],
)
print(response.output_text)

Event: Science Fair  
Participants: Alice and Bob  
Day: Friday


Event Information:
- Event: Science Fair
- Participants: Alice and Bob
- Day: Friday


Event: Science Fair  
Participants: Alice and Bob  
Day: Friday


## Structured RAG

In [23]:

from gitsource import GithubRepositoryDataReader, chunk_documents
from minsearch import Index

reader = GithubRepositoryDataReader(
    repo_owner="evidentlyai",
    repo_name="docs",
    allowed_extensions={"md", "mdx"},
)
files = reader.read()

parsed_docs = [doc.parse() for doc in files]
chunked_docs = chunk_documents(parsed_docs, size=3000, step=1500)

index = Index(
    text_fields=["title", "description", "content"],
    keyword_fields=["filename"]
)
index.fit(chunked_docs)

print(f"Indexed {len(chunked_docs)} chunks from {len(files)} documents")

Indexed 385 chunks from 95 documents


In [24]:
def search(query):
    results = index.search(
        query=query,
        num_results=5
    )
    return results

In [26]:

import json

instructions = """
You're a documentation assistant. Answer the QUESTION based on the CONTEXT from our documentation.

Use only facts from the CONTEXT when answering.
If the answer isn't in the CONTEXT, say so.
"""

prompt_template = """
<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(question, search_results):
    context = json.dumps(search_results, indent=2)
    return prompt_template.format(
        question=question,
        context=context
    )

In [27]:
def llm(user_prompt, instructions=None, model="gpt-4o-mini"):
    messages = []

    if instructions:
        messages.append({
            "role": "system",
            "content": instructions
        })

    messages.append({
        "role": "user",
        "content": user_prompt
    })

    response = openai_client.responses.create(
        model=model,
        input=messages
    )

    return response.output_text

In [29]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    return llm(prompt, instructions)

In [31]:
answer = rag('how do I implement LLM as a Judge?')

In [46]:
def llm_structured(
        user_prompt,
        output_type,
        instructions=None,
        model="gpt-4o-mini",
    ):
    messages = []

    if instructions:
        messages.append({
            "role": "system",
            "content": instructions
        })

    messages.append({
        "role": "user",
        "content": user_prompt
    })

    response = openai_client.responses.parse(
        model=model,
        input=messages,
        text_format=output_type
    )

    return response.output_parsed

In [47]:
response = llm_structured(
    instructions="Extract the event information.",
    user_prompt="Alice and Bob are going to a science fair on Friday.",
    output_type=CalendarEvent,
)

In [48]:
response

CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])

In [50]:
class RAGResponse(BaseModel):
    answer: str
    found_answer: bool

In [52]:
def rag_structured(query, output_type=RAGResponse):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    return llm_structured(
        instructions=instructions,
        user_prompt=prompt,
        output_type=output_type,
    )

In [56]:
answer = rag_structured('how do i do llm evals?')

print(answer.answer[:100])
print(answer.found_answer)

To perform LLM evaluations, you can follow these steps:

1. **Install the Required Package**: Instal
True


In [57]:
answer = rag_structured('how do I install kafka on windows?')

print(answer.answer[:100])
print(answer.found_answer)

The provided context does not contain any information about installing Kafka on Windows.
False


In [58]:
RAGResponse.model_json_schema()

{'properties': {'answer': {'title': 'Answer', 'type': 'string'},
  'found_answer': {'title': 'Found Answer', 'type': 'boolean'}},
 'required': ['answer', 'found_answer'],
 'title': 'RAGResponse',
 'type': 'object'}

In [64]:
from typing import Optional

class RAGResponse(BaseModel):
    answer: Optional[str] = None
    found_answer: bool

In [65]:
RAGResponse.model_json_schema()

{'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'title': 'Answer'},
  'found_answer': {'title': 'Found Answer', 'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RAGResponse',
 'type': 'object'}

In [68]:
answer = rag_structured('how do I install kafka on windows?', RAGResponse)

print(answer.answer)
print(answer.found_answer)

The provided context does not contain information on how to install Kafka on Windows.
False


In [69]:
answer = rag_structured('how do I install kafka on windows?', RAGResponse)

print(answer.answer)
print(answer.found_answer)

None
False


In [70]:

instructions = """
You're a documentation assistant. Answer the QUESTION based on the CONTEXT from our documentation.

Use only facts from the CONTEXT when answering.
If the answer isn't in the CONTEXT, say so.

If you don't find the answer, set `answer` to None
"""

In [71]:
answer = rag_structured('how do I install kafka on windows?', RAGResponse)

print(answer.answer)
print(answer.found_answer)

None
False


In [73]:

instructions = """
You're a documentation assistant. Answer the QUESTION based on the CONTEXT from our documentation.

Use only facts from the CONTEXT when answering.
If the answer isn't in the CONTEXT, say so.
"""

In [76]:
class RAGResponse(BaseModel):
    """
    The response from the documentation RAG system

    If the answer to the question wasn't found in the database, `answer` is None
    """
    answer: Optional[str] = None
    found_answer: bool

In [77]:
RAGResponse.model_json_schema()

{'description': "The response from the documentation RAG system\n\nIf the answer to the question wasn't found in the database, `answer` is None",
 'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'title': 'Answer'},
  'found_answer': {'title': 'Found Answer', 'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RAGResponse',
 'type': 'object'}

In [78]:
answer = rag_structured('how do I install kafka on windows?', RAGResponse)

print(answer.answer)
print(answer.found_answer)

None
False


In [81]:
from pydantic import Field

class RAGResponse(BaseModel):
    """
    The response from the documentation RAG system
    """
    answer: Optional[str] = Field(None, description="Answer to the question or None if it's not found")
    found_answer: bool = Field(description="True if the answer is found, False otherwise")

In [82]:
RAGResponse.model_json_schema()

{'description': 'The response from the documentation RAG system',
 'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'description': "Answer to the question or None if it's not found",
   'title': 'Answer'},
  'found_answer': {'description': 'True if the answer is found, False otherwise',
   'title': 'Found Answer',
   'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RAGResponse',
 'type': 'object'}

In [83]:
answer = rag_structured('how do I install kafka on windows?', RAGResponse)

print(answer.answer)
print(answer.found_answer)

None
False


In [93]:
from typing import Literal

class RAGResponse(BaseModel):
    """
    This model provides a structured answer with metadata about the response,
    including confidence, categorization, and follow-up suggestions.
    """

    answer: str = Field(description="The main answer to the user's question in markdown")
    found_answer: bool = Field(description="True if relevant information was found in the documentation")
    confidence: float = Field(description="Confidence score from 0.0 to 1.0 indicating how certain the answer is")
    confidence_explanation: str = Field(description="Explanation about the confidence level")
    answer_type: Literal["how-to", "explanation", "troubleshooting", "comparison", "reference"] = Field(description="The category of the answer")
    followup_questions: list[str] = Field(description="Suggested follow-up questions the user might want to ask")

In [94]:
RAGResponse.model_json_schema()

{'description': 'This model provides a structured answer with metadata about the response,\nincluding confidence, categorization, and follow-up suggestions.',
 'properties': {'answer': {'description': "The main answer to the user's question in markdown",
   'title': 'Answer',
   'type': 'string'},
  'found_answer': {'description': 'True if relevant information was found in the documentation',
   'title': 'Found Answer',
   'type': 'boolean'},
  'confidence': {'description': 'Confidence score from 0.0 to 1.0 indicating how certain the answer is',
   'title': 'Confidence',
   'type': 'number'},
  'confidence_explanation': {'description': 'Explanation about the confidence level',
   'title': 'Confidence Explanation',
   'type': 'string'},
  'answer_type': {'description': 'The category of the answer',
   'enum': ['how-to',
    'explanation',
    'troubleshooting',
    'comparison',
    'reference'],
   'title': 'Answer Type',
   'type': 'string'},
  'followup_questions': {'description': 'S

In [95]:
answer = rag_structured('how do I evaluate llms', RAGResponse)


In [99]:
print(answer.answer[:100])
print(answer.confidence)
print(answer.confidence_explanation)
print(answer.answer_type)
print(answer.followup_questions)


### How to Evaluate LLMs

1. **Setup**: Start by installing the `evidently` package:
   ```bash
   p
0.95
The steps provided are directly derived from the context and outline a clear pathway to evaluating LLMs using multiple judges.
how-to
['What are the best practices for evaluating LLM outputs?', 'How do I troubleshoot issues with LLM evaluations?', 'Can I use custom metrics for LLM evaluation?']


In [103]:
answer = rag_structured('how do I install kafka on windows?', RAGResponse)


In [104]:
print(answer.answer[:100])
print(answer.confidence)
print(answer.confidence_explanation)
print(answer.answer_type)
print(answer.followup_questions)


The provided documentation does not contain any information on installing Kafka on Windows. Please r
0.0
The relevant information for installing Kafka on Windows is not mentioned in the provided context.
reference
['Where can I find the official Kafka installation guide?', 'What are the system requirements for Kafka on Windows?', 'Can you provide troubleshooting tips for installing Kafka?']


In [105]:
from pydantic import model_validator


class AnswerNotFound(BaseModel):
    explanation: str


class AnswerResponse(BaseModel):
    """
    If answer is found, 'answer' is populated.
    If no answer is found, 'answer_not_found' is populated.
    Only one of the two fields can be set at a time. Never both or neither.
    """

    answer_not_found: Optional[AnswerNotFound] = None
    found_answer: bool
    answer: Optional[RAGResponse] = None

    @model_validator(mode="after")
    def check_consistency(self):
        if self.answer is not None and self.answer_not_found is not None:
            raise ValueError("Provide either 'answer' or 'answer_not_found', not both.")

        if self.answer is None and self.answer_not_found is None:
            raise ValueError("Provide either 'answer' or 'answer_not_found'.")

        return self

In [107]:
AnswerResponse.model_json_schema()

{'$defs': {'AnswerNotFound': {'properties': {'explanation': {'title': 'Explanation',
     'type': 'string'}},
   'required': ['explanation'],
   'title': 'AnswerNotFound',
   'type': 'object'},
  'RAGResponse': {'description': 'This model provides a structured answer with metadata about the response,\nincluding confidence, categorization, and follow-up suggestions.',
   'properties': {'answer': {'description': "The main answer to the user's question in markdown",
     'title': 'Answer',
     'type': 'string'},
    'found_answer': {'description': 'True if relevant information was found in the documentation',
     'title': 'Found Answer',
     'type': 'boolean'},
    'confidence': {'description': 'Confidence score from 0.0 to 1.0 indicating how certain the answer is',
     'title': 'Confidence',
     'type': 'number'},
    'confidence_explanation': {'description': 'Explanation about the confidence level',
     'title': 'Confidence Explanation',
     'type': 'string'},
    'answer_type': 

In [110]:
answer = rag_structured('how do I install kafka on windows?', AnswerResponse)
answer

AnswerResponse(answer_not_found=AnswerNotFound(explanation='The context provided does not include any information about installing Kafka on Windows.'), found_answer=False, answer=None)

In [111]:
answer = rag_structured('how do I run llm evals?', AnswerResponse)
answer

AnswerResponse(answer_not_found=None, found_answer=True, answer=RAGResponse(answer='To run LLM evaluations, follow these steps:\n\n1. **Connect to Evidently Cloud**: Ensure you are connected and have created a project.\n   ```python\n   ws = CloudWorkspace(token="YOUR_API_TOKEN", url="https://app.evidently.cloud")\n   project = ws.create_project("Your Project Name", org_id="YOUR_ORG_ID")\n   project.save()\n   ```\n\n2. **Prepare Your Dataset**: Create a dataset with computed descriptors (like text length, sentence count). For example:\n   ```python\n   data = [\n       ["Question 1", "Response 1"],\n       ["Question 2", "Response 2"]\n   ]\n   ref_data = pd.DataFrame(data, columns=["question", "target_response"])\n   ref_dataset = Dataset.from_pandas(ref_data,\n       data_definition=DataDefinition(),\n       descriptors=[\n           TextLength("target_response", alias="Length"),\n           SentenceCount("target_response", alias="Sentence")\n       ])\n   ```\n\n3. **Run an Evaluat

In [116]:
from pydantic import ValidationError

try:
    AnswerResponse()
except ValidationError as e:
    print("Validation error:")
    print(e)


Validation error:
1 validation error for AnswerResponse
found_answer
  Field required [type=missing, input_value={}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.12/v/missing
