In [1]:
from openai import OpenAI

openai_client = OpenAI()

In [2]:
import json

from gitsource import GithubRepositoryDataReader, chunk_documents
from minsearch import AppendableIndex


reader = GithubRepositoryDataReader(
    repo_owner="evidentlyai",
    repo_name="docs",
    allowed_extensions={"md", "mdx"},
)
files = reader.read()

parsed_docs = [doc.parse() for doc in files]
chunked_docs = chunk_documents(parsed_docs, size=3000, step=1500)

index = AppendableIndex(
    text_fields=["title", "description", "content"],
    keyword_fields=["filename"]
)
index.fit(chunked_docs)

<minsearch.append.AppendableIndex at 0x240b47d1160>

In [3]:
def search(query):
    results = index.search(
        query=query,
        num_results=5
    )
    return results

search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the documentation database for relevant results based on a query string.",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The search query to look up in the index"
            }
        },
        "required": [
            "query"
        ]
    }
}

In [17]:
question = "How do I create a dahsbord in Evidently?"

In [18]:
class Agent:

    def __init__(self, llm_client, model, instructions, tools, output_type=None):
        self.llm_client = llm_client
        self.model = model
        self.instructions = instructions
        self.tools = tools

    def make_call(self, tool_call):
        arguments = json.loads(tool_call.arguments)
        name = tool_call.name
    
        if name == 'search':
            result = search(**arguments)
        elif name == 'add_entry':
            result = add_entry(**arguments)
        else:
            result = 'not found tool "{name}"'
        
        return {
            "type": "function_call_output",
            "call_id": tool_call.call_id,
            "output": json.dumps(result),
        }   

    def loop(self, user_prompt, message_history=None):
        if not message_history:
            message_history = [
                {"role": "system", "content": self.instructions},
            ]
            
        message_history.append({"role": "user", "content": user_prompt})

        iteration_number = 0
    
        while True:
            response = self.llm_client.responses.create(
                model=self.model,
                input=message_history,
                tools=self.tools,
            )
        
            print(f'iteraration number {iteration_number}...') 
            message_history.extend(response.output)
        
            has_function_calls = False
        
            for message in response.output:
                if message.type == 'function_call':
                    print(f'executing {message.name}({message.arguments})...')
                    tool_call_output = self.make_call(message)
                    message_history.append(tool_call_output)
                    has_function_calls = True
        
                if message.type == 'message':
                    text = message.content[0].text
                    print('ASSISTANT:', text)
        
            iteration_number = iteration_number + 1
            print()
            
            if not has_function_calls:
                break

        return message_history

In [19]:
from typing import Literal
from pydantic import BaseModel, Field


class RAGResponse(BaseModel):
    """
    This model provides a structured answer with metadata about the response,
    including confidence, categorization, and follow-up suggestions.
    """

    answer: str = Field(description="The main answer to the user's question in markdown")
    found_answer: bool = Field(description="True if relevant information was found in the documentation")
    confidence: float = Field(description="Confidence score from 0.0 to 1.0 indicating how certain the answer is")
    confidence_explanation: str = Field(description="Explanation about the confidence level")
    answer_type: Literal["how-to", "explanation", "troubleshooting", "comparison", "reference"] = Field(description="The category of the answer")
    followup_questions: list[str] = Field(description="Suggested follow-up questions the user might want to ask")


In [22]:
instructions = """
You're a documentation assistant. 

Answer the user question using the documentation knowledge base

Make 3 iterations:

1) in the first iteration, perform one search
2) in the second interation, analyze the results from the previous search
   and perform 2 more searches
3) synthesise the results into the output

Stop after 3 iterations

Use only facts from the knowledge base when answering.
If you cannot find the answer, inform the user.

Our knowledge base is entirely about Evidently, so you don't need to 
include the word 'evidently' in search results
"""

In [69]:
class SimpleStructuredAgent:

    def __init__(self, llm_client, model, instructions, tools, output_type=None):
        self.llm_client = llm_client
        self.model = model
        self.instructions = instructions
        self.tools = tools
        self.output_type = output_type

    def make_call(self, tool_call):
        arguments = json.loads(tool_call.arguments)
        name = tool_call.name
    
        if name == 'search':
            result = search(**arguments)
        elif name == 'add_entry':
            result = add_entry(**arguments)
        else:
            result = 'not found tool "{name}"'
        
        return {
            "type": "function_call_output",
            "call_id": tool_call.call_id,
            "output": json.dumps(result),
        }   

    def loop(self, user_prompt, message_history=None):
        if not message_history:
            message_history = [
                {"role": "system", "content": self.instructions},
            ]
            
        message_history.append({"role": "user", "content": user_prompt})

        iteration_number = 0
    
        while True:
            if output_type:
                response = self.llm_client.responses.parse(
                    model=self.model,
                    input=message_history,
                    tools=self.tools,
                    text_format=self.output_type
                )
            else:
                response = self.llm_client.responses.create(
                    model=self.model,
                    input=message_history,
                    tools=self.tools,
                )
        
            print(f'iteraration number {iteration_number}...') 
            message_history.extend(response.output)
        
            has_function_calls = False
        
            for message in response.output:
                if message.type == 'function_call':
                    print(f'executing {message.name}({message.arguments})...')
                    tool_call_output = self.make_call(message)
                    message_history.append(tool_call_output)
                    has_function_calls = True
        
                if message.type == 'message':
                    text = message.content[0].text
                    print('ASSISTANT:', text)
        
            iteration_number = iteration_number + 1
            print()
            
            if not has_function_calls:
                break

        return message_history

In [70]:
simple_agent = SimpleStructuredAgent(
    llm_client=OpenAI(),
    model='gpt-4o-mini',
    instructions=instructions,
    tools=[search_tool],
    output_type=RAGResponse
)

In [71]:
messages = simple_agent.loop('evidently adshboards') 

NameError: name 'output_type' is not defined

In [26]:
class Agent:

    def __init__(self, llm_client, model, instructions, tools):
        self.llm_client = llm_client
        self.model = model
        self.instructions = instructions
        self.tools = tools

    def make_call(self, tool_call):
        arguments = json.loads(tool_call.arguments)
        name = tool_call.name
    
        if name == 'search':
            result = search(**arguments)
        elif name == 'add_entry':
            result = add_entry(**arguments)
        else:
            result = 'not found tool "{name}"'
        
        return {
            "type": "function_call_output",
            "call_id": tool_call.call_id,
            "output": json.dumps(result),
        }   

    def loop(self, user_prompt, message_history=None):
        if not message_history:
            message_history = [
                {"role": "system", "content": self.instructions},
            ]
            
        message_history.append({"role": "user", "content": user_prompt})

        iteration_number = 0
    
        while True:
            response = self.llm_client.responses.create(
                model=self.model,
                input=message_history,
                tools=self.tools,
            )
        
            print(f'iteraration number {iteration_number}...') 
            message_history.extend(response.output)
        
            has_function_calls = False
        
            for message in response.output:
                if message.type == 'function_call':
                    print(f'executing {message.name}({message.arguments})...')
                    tool_call_output = self.make_call(message)
                    message_history.append(tool_call_output)
                    has_function_calls = True
        
                if message.type == 'message':
                    text = message.content[0].text
                    print('ASSISTANT:', text)
        
            iteration_number = iteration_number + 1
            print()
            
            if not has_function_calls:
                break

        return message_history

In [28]:
instructions = """
You're a documentation assistant. 

Answer the user question using the documentation knowledge base

Make 3 iterations:

1) in the first iteration, perform one search
2) in the second interation, analyze the results from the previous search
   and perform 2 more searches
3) synthesise the results into the output

IMPORTANT: at each step, give an explanation of why you want to perform 
search for this particular search query. It should be 2-3 sentences explaining
the logic of your decision.

Use only facts from the knowledge base when answering.
If you cannot find the answer, inform the user.

Our knowledge base is entirely about Evidently, so you don't need to 
include the word 'evidently' in search results
"""

In [29]:
agent = Agent(
    llm_client=OpenAI(),
    model='gpt-4o-mini',
    instructions=instructions,
    tools=[search_tool],
)

In [30]:
messages = agent.loop('evidently adshboards') 

iteraration number 0...
ASSISTANT: I will start by searching for "dashboards" to understand the available features related to this topic. This term is broad and will help identify specific tools, functionalities, or components related to dashboards that may be useful for your query.

Let's perform the search now.
executing search({"query":"dashboards"})...

iteraration number 1...
ASSISTANT: The search results provided a wealth of information about how dashboards function within the system. They describe how to create and manage dashboards, add panels, and organize data through tabs. Key highlights include:

1. **Dashboard Overview**: A dashboard is essential for visualizing evaluation results over time and tracking the performance of an AI application.

2. **Panels and Tabs**: You can add multiple panels to a dashboard to display various metrics, and you can organize these panels into different tabs for better clarity.

3. **Adding Panels**: Panels can be added both through a user int

In [36]:
response = openai_client.responses.parse(
    model=agent.model,
    input=messages,
    text_format=RAGResponse
)

rag_response = response.output_parsed

In [38]:
class StructuredAgent1(Agent):

    def __init__(self, llm_client, model, instructions, tools, output_type):
        super().__init__(llm_client, model, instructions, tools)
        self.output_type = output_type
        
    def structured_loop(self, user_prompt, message_history=None):
        message_history = self.loop(user_prompt, message_history)
        response = self.llm_client.responses.parse(
            model=self.model,
            input=message_history,
            text_format=self.output_type
        )
    
        output = response.output_parsed
        return message_history, output
    

In [None]:
structure_agent_1 = StructuredAgent1(
    llm_client=OpenAI(),
    model='gpt-4o-mini',
    instructions=instructions,
    tools=[search_tool],
    output_type=RAGResponse
)

_, output = structure_agent_1.structured_loop('bashboards')

In [42]:
print(output.answer)

### Overview of Dashboards
- **What is a Dashboard?**: A dashboard provides a clear view of your AI application performance, helping track evaluation results across multiple experiments or live production quality over time.
- **Tabs**: Each project has its own dashboard, organized into tabs, which can be added or customized, providing a logical structure to your panels.

### Creating and Managing Panels
1. **Panel Types**: You can add several types of panels such as:
    - **Text Panels**: For displaying titles or notes.
    - **Counter Panels**: For showing metrics with optional text.
    - **Charts**: Including pie charts, bar plots, and line plots to visualize data.

2. **Adding Panels**: There are two main ways to add panels:
    - **User Interface**: Enter "Edit" mode, click "Add Panel," and follow prompts to configure.
    - **Python API**: Using methods to programmatically create panels as per your requirements.

3. **Configuration Options**: While adding a panel, you can specif

In [43]:
def structure_result(result: RAGResponse):
    return 'ok'

In [None]:
schema = RAGResponse.model_json_schema()
schema["type"] = "object"
schema["additionalProperties"] = False

structure_result_tool = {
    "type": "function",
    "name": "structure_result",
    "description": "Call this function when you're ready to show the final result as structured data.",
    "strict": True,
    "parameters": schema,
}

In [52]:
response = openai_client.responses.create(
    model=agent.model,
    input=messages,
    tools=[structure_result_tool]
)

In [60]:
result_json = json.loads(response.output[0].arguments)
result = RAGResponse.model_validate(result_json)

In [62]:
def create_fake_tool(output_type, name="structure_result"):
    schema = output_type.model_json_schema()
    schema["type"] = "object"
    schema["additionalProperties"] = False
    
    structure_result_tool = {
        "type": "function",
        "name": name,
        "description": "Call this function when you're ready to show the final result as structured data.",
        "strict": True,
        "parameters": schema,
    }

    return structure_result_tool

In [66]:
class StructuredAgent2:

    FAKE_TOOL_NAME = 'structure_result'
    
    def __init__(self, llm_client, model, instructions, tools, output_type):
        self.llm_client = llm_client
        self.model = model
        self.instructions = instructions
        self.tools = tools + [create_fake_tool(output_type, self.FAKE_TOOL_NAME)]
        self.output_type = output_type

    def make_call(self, tool_call):
        arguments = json.loads(tool_call.arguments)
        name = tool_call.name
    
        if name == 'search':
            result = search(**arguments)
        elif name == 'add_entry':
            result = add_entry(**arguments)
        else:
            result = 'not found tool "{name}"'
        
        return {
            "type": "function_call_output",
            "call_id": tool_call.call_id,
            "output": json.dumps(result),
        }   

    def loop(self, user_prompt, message_history=None):
        if not message_history:
            message_history = [
                {"role": "system", "content": self.instructions},
            ]
            
        message_history.append({"role": "user", "content": user_prompt})

        iteration_number = 0
    
        while True:
            response = self.llm_client.responses.create(
                model=self.model,
                input=message_history,
                tools=self.tools,
            )
        
            print(f'iteraration number {iteration_number}...') 
            message_history.extend(response.output)
        
            output = None

            for message in response.output:
                if message.type == 'function_call':
                    print(f'executing {message.name}({message.arguments})...')
                    if message.name == self.FAKE_TOOL_NAME:
                        output_ready = True
                        output_json = json.loads(message.arguments)
                        output = self.output_type.model_validate(output_json)
                        continue

                    tool_call_output = self.make_call(message)
                    message_history.append(tool_call_output)
        
                if message.type == 'message':
                    text = message.content[0].text
                    print('ASSISTANT:', text)
        
            iteration_number = iteration_number + 1
            print()
            
            if output:
                break

        return message_history, output

In [67]:
structure_agent_2 = StructuredAgent2(
    llm_client=OpenAI(),
    model='gpt-4o-mini',
    instructions=instructions,
    tools=[search_tool],
    output_type=RAGResponse
)

_, output = structure_agent_2.loop('bashboards')

iteraration number 0...
ASSISTANT: It seems like you might be looking for information related to dashboards. Since this is a broad topic, I want to perform an initial search to see if there's any relevant documentation regarding dashboards, their creation, features, or usage. This will help me understand the context and narrow down the search in the subsequent steps.

I'll proceed with the first search for "dashboards."
executing search({"query":"dashboards"})...

iteraration number 1...
ASSISTANT: The first search returned several relevant documents related to dashboards, including how to add and manage dashboard panels, the role of dashboards in tracking evaluation results, and ways to organize them using tabs. This foundational information indicates that dashboards are central to visualizing evaluation results and organizing data.

Now, to refine my understanding and gather more specific information, I will conduct two additional searches: one for "dashboard panels" to learn more ab

In [68]:
print(output.answer)

### Overview of Dashboards
1. **What are Dashboards?**
   - Dashboards provide a visual representation of your evaluation results, allowing you to track performance over time across various projects and experiments. Each project starts with an empty dashboard that you can populate by adding evaluation reports.

2. **Creating and Managing Tabs**
   - You can add multiple tabs to organize your panels logically within the dashboard. This can be done either through the UI by entering "Edit" mode and using the "+" button, or programmatically using the Python API with methods like `project.dashboard.add_tab("TabName")`.

3. **Adding Panels**
   - Panels are the individual visual elements of a dashboard that display metrics. You can add various panel types such as counters, line plots, bar charts, and pie charts. Each panel must reference a specific metric within the reports logged to the project.

### Detailed Information on Dashboard Panels
- **Panel Types**: You can create text panels for 