In [0]:
# %pip install -U -qqqq mlflow databricks-langchain databricks-agents uv langgraph==0.3.4
# dbutils.library.restartPython()

In [0]:
from databricks.agents import list_deployments, get_deployments, delete_deployment
import mlflow
from configs.project import get_project_config
from src.utils import set_mlflow_experiment

In [0]:
multi_agent_config = mlflow.models.ModelConfig(development_config="../configs/rag_genie_agent.yaml")
projectConfig = get_project_config()

In [0]:
experiment = set_mlflow_experiment(multi_agent_config.get("databricks_resources").get("mlflow_experiment_name")+"_eval")

In [0]:
model_name = multi_agent_config.get("databricks_resources").get("model_name")
UC_MODEL_NAME = f"{projectConfig.uc_catalog}.{projectConfig.uc_schema}.{model_name}"
print(UC_MODEL_NAME)

In [0]:
# Get the deployment for a specific agent model name and version
agent_model_name = UC_MODEL_NAME  # Set to your Unity Catalog model name
agent_model_version = 1  # Set to your agent model version
deployment = get_deployments(model_name=agent_model_name, model_version=agent_model_version)[0]

endpoint_name = deployment.__dict__.get("endpoint_name")
print(endpoint_name)

In [0]:
# import pyspark.sql.functions as F
# eval_sdf = spark.table(projectConfig.eval_tables.get("id_1").fqn)
# eval_dataset = eval_sdf.filter(F.col("request").contains("American Express"))
# display(eval_dataset)

In [0]:
import mlflow
from mlflow.deployments import get_deploy_client

# The guidelines below will be used to evaluate any response of the agent.
global_guidelines = {
  "rejection": ["If the request is unrelated to company financials and reports, the response must should be a rejection of the request"],
  # "conciseness": ["If the request is related to company financials and reports, the response must should be concise"],
  # "api_code": ["If the request is related to to company financials and reports and question about API, the response must have code"],
  "professional": ["The response must be professional."]
}

eval_set = [{
  "request": {"messages": [{"role": "user", "content": "Was American Express able to retain card members during 2022?"}]}
}, {
  "request": "Was American Express revenue in 2022?",
}]

# Define a very simple system-prompt agent.
@mlflow.trace(span_type="AGENT")
def my_agent(messages):
  SYSTEM_PROMPT = """
    You are a chatbot that answers questions about company financials and reports.
    For requests unrelated to company financials and reports, reject the request.
  """
  return get_deploy_client("databricks").predict(
    endpoint=endpoint_name,
    inputs={"messages": [{"role": "system", "content": SYSTEM_PROMPT}, *messages]}
  )

# Evaluate the Agent with the evaluation set and log it to the MLFlow run "system_prompt_v0".
with mlflow.start_run(run_name="system_prompt") as run:
  mlflow.evaluate(
    data=eval_set,
    model=lambda request: my_agent(**request),
    model_type="databricks-agent",
    evaluator_config={
      "databricks-agent": {
        "global_guidelines": global_guidelines
      }
    }
  )