In [0]:
model_name = 'slack-bot-rag'
model_coordinates = f'users.antoine_amend.{model_name}'

In [0]:
databricksURL = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().getOrElse(None)
myToken = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)
header = {'Authorization': 'Bearer {}'.format(myToken)}
endpoint = '/api/2.0/serving-endpoints'
url = f'{databricksURL}{endpoint}'

In [0]:
import pandas as pd

history = [
  ['HUMAN', 'Hello, how are you?'],
  ['BOT', 'I am doing great, what about yourself?'],
  ['HUMAN', 'Thanks for asking. Can you help me with a question?'],
  ['BOT', 'Sure, what\s up?'],
  ['HUMAN', 'What is Lorem Ipsum?']
]

model_input = pd.DataFrame(history, columns=['user', 'text'])

In [0]:
import mlflow
from mlflow.pyfunc import PythonModel

class DummySlackModel(mlflow.pyfunc.PythonModel):

  def predict(self, context, model_input):
    import pandas as pd
    history = model_input.values.tolist()
    question = history.pop(-1)[1]
    return pd.DataFrame([
      [
        'Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry\'s standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.',
        [
          'https://www.lipsum.com/',
          'https://loremipsum.io/'
        ]
      ]
    ], columns=['answer', 'links'])

In [0]:
from mlflow.models.signature import infer_signature
python_model = DummySlackModel()
model_output = python_model.predict(None, model_input)
model_signature = infer_signature(model_input=model_input, model_output=model_output)
model_signature

inputs: 
  ['user': string, 'text': string]
outputs: 
  ['answer': string, 'links': string]
params: 
  None

In [0]:
with mlflow.start_run() as run:
  mlflow.pyfunc.log_model(
    artifact_path = 'model',
    python_model = python_model,
    signature=model_signature,
    input_example=model_input
  )



Uploading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

In [0]:
from mlflow.tracking import MlflowClient
mlflow.set_registry_uri('databricks-uc')
client = MlflowClient()
latest_model = mlflow.register_model(f'runs:/{run.info.run_id}/model', model_coordinates)
client.set_registered_model_alias(model_coordinates, 'production', latest_model.version)

Registered model 'users.antoine_amend.slack-bot-rag' already exists. Creating a new version of this model...


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

2023/11/24 16:39:05 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: users.antoine_amend.slack-bot-rag, version 3
Created version '3' of model 'users.antoine_amend.slack-bot-rag'.


In [0]:
import requests
import json

payload = {
  'name': model_name,
  'config': {
    'served_models': [{
      'model_name': model_coordinates,
      'model_version': latest_model.version,
      'workload_size': 'Small',
      'scale_to_zero_enabled': True
    }]
  }
}

x = requests.post(url, json=payload, headers=header)
print(json.dumps(json.loads(x.text), indent=2))

{
  "name": "slack-bot-rag",
  "creator": "antoine.amend@databricks.com",
  "creation_timestamp": 1700843945000,
  "last_updated_timestamp": 1700843945000,
  "state": {
    "ready": "NOT_READY",
    "config_update": "IN_PROGRESS"
  },
  "pending_config": {
    "start_time": 1700843945000,
    "served_models": [
      {
        "name": "slack-bot-rag-3",
        "model_name": "users.antoine_amend.slack-bot-rag",
        "model_version": "3",
        "workload_size": "Small",
        "scale_to_zero_enabled": true,
        "workload_type": "CPU",
        "state": {
          "deployment": "DEPLOYMENT_CREATING",
          "deployment_state_message": "Creating resources for served model."
        },
        "creator": "antoine.amend@databricks.com",
        "creation_timestamp": 1700843945000
      }
    ],
    "config_version": 1,
    "traffic_config": {
      "routes": [
        {
          "served_model_name": "slack-bot-rag-3",
          "traffic_percentage": 100,
          "served_enti