In [0]:
%pip install -U -qqqq mlflow databricks-openai databricks-agents threadpoolctl==3.1.0
dbutils.library.restartPython()

In [0]:
dbutils.widgets.text("catalog_name", "btafur_catalog")
dbutils.widgets.text("schema_name", "default")
dbutils.widgets.text("model_name", "quickstart_agent")
dbutils.widgets.text("logged_run_id", "None")

catalog_name = dbutils.widgets.get("catalog_name")
schema_name = dbutils.widgets.get("schema_name")
model_name = dbutils.widgets.get("model_name")
logged_run_id = dbutils.widgets.get("logged_run_id")

dbutils.widgets.text("experiment_name", f"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}")
experiment_name = dbutils.widgets.get("experiment_name")

In [0]:
import mlflow
import mlflow.genai.datasets
import time
from databricks.connect import DatabricksSession

registered_model_name = f"{catalog_name}.{schema_name}.{model_name}"
model_uri = f"runs:/{logged_run_id}/agent"

mlflow.set_experiment(experiment_name)

In [0]:
query = f"SELECT * FROM {catalog_name}.{schema_name}.labelled_sentences"
df = spark.sql(query).toPandas()
display(df)

In [0]:
import mlflow
from mlflow.genai.scorers import Guidelines, Correctness, RelevanceToQuery
import datetime

eval_data = []
for _, row in df.iterrows():
    eval_item = {
        "inputs": {
            "content": row["review"]  # This matches the function parameter name
        },
        "expectations": {
            "expected_response": str(row["sentiment"])  # Adjust column name
        }
    }
    eval_data.append(eval_item)

mlflow_eval_dataset = None
    
try:
    # Try to get existing dataset
    mlflow_eval_dataset = mlflow.genai.get_dataset(f"{catalog_name}.{schema_name}.mlflow_eval_dataset")
    print(f"✓ Using existing dataset: {catalog_name}.{schema_name}.mlflow_eval_dataset")
    
except Exception as get_error:
    print(f"Dataset not found, creating dataset")
    
    try:
        # Create new dataset
        mlflow_eval_dataset = mlflow.genai.create_dataset(f"{catalog_name}.{schema_name}.mlflow_eval_dataset")
        mlflow_eval_dataset.merge_records(eval_data)
        dataset_created = True
        print(f"✓ Created new dataset: {catalog_name}.{schema_name}.mlflow_eval_dataset")
        
    except Exception as create_error:
        print(f"⚠ Could not create dataset {create_error}")

if (mlflow_eval_dataset is not None):       
        
    guidelines = {
        "sentiment_accuracy": "Response must correctly identify sentiment",
        "clarity": ["Response must be clear and concise"]
    }

    agent = mlflow.pyfunc.load_model(model_uri)

    def predict_function(content):
        try:
            messages = [
                {
                    "role": "user",
                    "content": content
                }
            ]
            prediction = agent.predict({"messages": messages})
            return prediction
        except Exception as e:
            print(f"Error in predict_fn: {e}")
            return {"response": "PREDICTION_ERROR"}
        
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    with mlflow.start_run(run_id=logged_run_id) as run:
        with mlflow.start_run(run_name=f"sentiment_agent_evaluation_{timestamp}", nested=True) as eval_run:
            results = mlflow.genai.evaluate(
                data=mlflow_eval_dataset,
                predict_fn=predict_function,
                scorers=[
                    Correctness(),
                    Guidelines(name="sentiment_accuracy", guidelines=guidelines["sentiment_accuracy"]),
                    Guidelines(name="clarity", guidelines=guidelines["clarity"]),
                ],
            )

    # Print results
    print("Evaluation Results:")
    print(results.metrics)

In [0]:
from mlflow import MlflowClient
client = MlflowClient()

registered_model = mlflow.register_model(model_uri, name=registered_model_name)

if results.metrics['sentiment_accuracy/mean'] > 0.9:
  print("Transitioning to champion")
  client.set_registered_model_alias(registered_model_name, "Champion",registered_model.version)