In [13]:
from openai import OpenAI

In [14]:
from google import genai

In [15]:
from dotenv import load_dotenv

In [16]:
import pandas as pd

In [17]:
import toml

In [18]:
from rich.console import Console
from rich.markdown import Markdown

In [19]:
from pydantic import BaseModel

In [20]:
CONSOLE = Console()

In [21]:
def print(text):
    """text: str"""
    CONSOLE.print(Markdown(text))

In [41]:
def print_questions(user_qs):
    """user_qs: UserQueries"""
    for qs in user_qs.queries:
        print(qs)

In [22]:
parsed_toml = toml.load('../.streamlit/secrets.toml')

In [23]:
openai_key = parsed_toml['OPENAI_API_KEY']

In [24]:
gclient = genai.Client(api_key=parsed_toml['GEMINI_API_KEY'])

In [25]:
resposne = gclient.models.generate_content(
    model="gemini-2.5-flash",
    contents="Give me a list of user personas who would be using Fiddler for AI/ML Monitoring and observability"
)

In [26]:
print(resposne.text)

In [27]:
class UserQueries(BaseModel):
    queries: list[str]

In [49]:
QS_TEMPLATE = """Give me a list of {num_questions} queries that a user would want to ask FiddleBot about their models and projects in Fiddler.

Fiddler has the following capabilities:
- Monitor performance of ML models and generative AI applications
- Protect your LLM and GenAI applications with Guardrails
- Analyze model behavior to identify issues and opportunities
- Improve AI systems through actionable insights

Implementing Fiddler ML/LLM monitoring requires just three steps:
1. Onboard your ML/LLM application to Fiddler by defining its inputs, outputs, and related metadata
2. Publish your application data to Fiddler, typically the "digital exhaust" from your model serving platform
3. Monitor performance through dashboards and alerts that track the metrics most important to your use case

Fiddler automatically handles the complex work of generating metrics, detecting anomalies, and providing the visualizations you need to maintain high-quality ML applications.

Fiddlebot aims to be a chat based UI to the Fiddler platform, where users can ask the bot to fetch information for them. It is limited to the preprod environment on Fiddler, so please keep that in mind.
This is also an unoptimized version and the aim of having this internal dry-run is to ascertain where and how the agent is failing, along with usage patterns. This will better inform us when it comes to optimizing performance.
There are 3 major components working under the hood.
- Chatbot : Chat UI
- Plan n Solve : Certain queries require multiple steps to be resolved. The PnS module aims to first generate a plan and solve each step.
- MCP Server : Model Context Protocol Server. A set of tools that the agent can utilise in solving for the query. Runs the Fiddler python client under the hood.

Please keep in mind that the bot is still a work in progress and failure is expected. The aim of this exercise is to catch how the agent fails when it is faced with ambiguous queries, queries with incomplete parameters (ex asking for model schema without providing project) and queries which are beyond the scope of the agent.

FiddleBot has access the following capabilities via tools:
- list all projects in fiddler
- list all models in a project
- get model schema
- get model specs
- list alert rules for a model
- list triggered alerts for a rule
- list all custom metrics for a model

{condition}
"""

In [50]:
AMBIGUOUS_CONDITION = """The user is new to Fiddler and is not aware of its capabilities and what to ask for.
Their questions are not specific and ambiguous and incomplete.
"""

In [51]:
CONFLICTING_CONDITION = """The user wants to test the Fiddler expert and asks questions with conflicting information and requirements"""

In [52]:
GOOD_CONDITION = """The user is familiar with Fiddler and is aware of its capabilities and what to ask for.
The questions that the user asks are specific and clear. There is no ambiguity in the questions.
"""

In [56]:
response = gclient.models.generate_content(
    model="gemini-2.5-flash",
    contents=QS_TEMPLATE.format(num_questions=40, condition=AMBIGUOUS_CONDITION),
    config={
        "response_mime_type": "application/json",
        "response_schema": UserQueries
    }
)

In [57]:
queries = response.parsed

In [58]:
print_questions(response.parsed)

In [61]:
good_response = gclient.models.generate_content(
    model="gemini-2.5-flash",
    contents=QS_TEMPLATE.format(num_questions=40, condition=GOOD_CONDITION),
    config={"response_mime_type": "application/json", "response_schema": UserQueries},
)
good_qs = response.parsed
print_questions(good_qs)

In [None]:
good_response = gclient.models.generate_content(
    model="gemini-2.5-flash",
    contents=QS_TEMPLATE.format(num_questions=40, condition=GOOD_CONDITION),
    config={"response_mime_type": "application/json", "response_schema": UserQueries},
)
good_qs = response.parsed
print_questions(good_qs)