#RAG Deployment

## Using Databricks managed Secrets

We need to use a Databricks secret to securely manage access to our model. We'll configure this with an access token stored in a safe, redacted location within the Databricks CLI. For more information on Databricks Secrets, refer to the documentation [here](https://docs.databricks.com/en/security/secrets/index.html)

Secrets scope: general <br/>
Secret: workstation

In [0]:
%pip install -U --quiet databricks-sdk==0.28.0 mlflow==2.12.1

dbutils.library.restartPython()

In [0]:
import mlflow
from mlflow import MlflowClient

model_name = f"ademianczuk.myfixit.virtual_assistant_rag_app"

# Point to UC registry
mlflow.set_registry_uri("databricks-uc")

def get_latest_model_version(model_name_in:str = None):
    """
    Get latest version of registered model
    """
    client = MlflowClient()
    model_version_infos = client.search_model_versions("name = '%s'" % model_name_in)
    if model_version_infos:
      return max([model_version_info.version for model_version_info in model_version_infos])
    else:
      return None

In [0]:
latest_model_version = get_latest_model_version(model_name)

if latest_model_version:
  print(f"Model created and logged to: {model_name}/{latest_model_version}")
else:
  raise(BaseException("Error: Model not created, verify if the named model was properly compiled and registered in unity catalog."))

In [0]:
from databricks.sdk.service.serving import EndpointCoreConfigInput

# Configure the endpoint
endpoint_config_dict = {
    "served_models": [
        {
            "model_name": model_name,
            "model_version": latest_model_version,
            "scale_to_zero_enabled": True,
            "workload_size": "Small",
            "environment_vars": {
                "DATABRICKS_TOKEN": "{{secrets/general/workstation}}",
                "DATABRICKS_HOST": "{{secrets/general/host}}"
            },
        },
    ],
    "auto_capture_config":{
        "catalog_name": "ademianczuk",
        "schema_name": "myfixit",
        "table_name_prefix": "rag_app_realtime"
    }
}

endpoint_config = EndpointCoreConfigInput.from_dict(endpoint_config_dict)

In [0]:
from databricks.sdk import WorkspaceClient

# Initiate the workspace client
w = WorkspaceClient()
serving_endpoint_name = f"myfixit_va_endpoint"

# Get endpoint if it exists
existing_endpoint = next(
    (e for e in w.serving_endpoints.list() if e.name == serving_endpoint_name), None
)

db_host = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().get("browserHostName").value()
serving_endpoint_url = f"{db_host}/ml/endpoints/{serving_endpoint_name}"

print(db_host)
print(serving_endpoint_url)

# If endpoint doesn't exist, create it
if existing_endpoint == None:
    print(f"Creating the endpoint {serving_endpoint_url}, this will take a few minutes to package and deploy the endpoint...")
    w.serving_endpoints.create_and_wait(name=serving_endpoint_name, config=endpoint_config)

# If endpoint does exist, update it to serve the new version
else:
    print(f"Updating the endpoint {serving_endpoint_url} to version {latest_model_version}, this will take a few minutes to package and deploy the endpoint...")
    w.serving_endpoints.update_config_and_wait(served_models=endpoint_config.served_models, name=serving_endpoint_name)

displayHTML(f'Your Model Endpoint Serving is now available. Open the <a href="/ml/endpoints/{serving_endpoint_name}">Model Serving Endpoint page</a> for more details.')

In [0]:
question = "What is the safest way to remove a phone battery?"
answer = w.serving_endpoints.query(serving_endpoint_name, inputs=[{"query": question}])
print(answer.predictions)

In [0]:
from mlflow.deployments import get_deploy_client

deploy_client = get_deploy_client("databricks")
response = deploy_client.predict(
  endpoint=serving_endpoint_name,
  inputs={"inputs" : [{"query": question}]}
)
print(response.predictions)