In [None]:
import os
import jwt
import requests
import shutil
import sys

import mlflow.sklearn
from mlflow.client import MlflowClient
from mlflow.tracking.request_header.registry import _request_header_provider_registry
import mlflow
import string
from mlflow.tracking.request_header.abstract_request_header_provider import RequestHeaderProvider

In [None]:

class DominoApiKeyRequestHeaderProvider(RequestHeaderProvider):
    """
    Provides X-Domino-Api-Key request header based on DOMINO_USER_API_KEY environment variable.
    """

    def __init__(self):
        self._domino_api_key = os.getenv("DOMINO_USER_API_KEY")

    def in_context(self):
        return self._domino_api_key is not None

    def request_headers(self):
        request_headers = {}

        if self._domino_api_key is not None:
            request_headers["X-Domino-Api-Key"] = self._domino_api_key

        return request_headers


class DominoExecutionRequestHeaderProvider(RequestHeaderProvider):
    """
    Provides X-Domino-Execution request header based on DOMINO_RUN_ID environment variable.
    """

    def __init__(self):
        run_id = os.getenv("DOMINO_RUN_ID")
        print(f"run-id{run_id}")
        self._domino_execution = jwt.encode({"execution_id": run_id}, "secret", algorithm="HS256")

    def in_context(self):
        return self._domino_execution is not None

    def request_headers(self):
        request_headers = {}

        if self._domino_execution is not None:
            request_headers["X-Domino-Execution"] = self._domino_execution

        return request_headers

In [None]:
def set_mlflow_tracking_uri():
    mlflow_tracking_uri = os.getenv("MLFLOW_TRACKING_URI")
    if not mlflow_tracking_uri:
        print("MLFLOW_TRACKING_URI environment variable must be set.")
        exit(1)
    mlflow.set_tracking_uri(mlflow_tracking_uri)


def register_domino_api_key_request_header_provider():
    if not os.getenv("DOMINO_USER_API_KEY"):
        print("DOMINO_USER_API_KEY environment variable must be set.")
        exit(1)
    # Set X-Domino-Api-Key request header
    _request_header_provider_registry.register(DominoApiKeyRequestHeaderProvider)


def register_domino_execution_request_header_provider():

    if not os.getenv("DOMINO_RUN_ID"):
        print("DOMINO_RUN_ID environment variable must be set.")
        exit(1)
    # Set X-Domino-Execution request header
    _request_header_provider_registry.register(DominoExecutionRequestHeaderProvider)



In [None]:
## This is an emulation. 

# This process needs a DOMINO_API_KEY. However service accounts do not have an api key. 
# Therefore we will generate a temporary one using a token. Do not store this token.
# Regernerate it each time you need to run using sa. This also means that you cannot run 
# multiple such processes in parallel.
import requests
import os

def regenerate_api_key_for_sa(mlflow_tracking_uri,sa_id,sa_token):


    headers = {
        "Authorization": f"Bearer {sa_token}",
        "Content-Type": "application/json"   # add this if you're posting JSON
    }
    data = {}
    url = f"{mlflow_tracking_uri}/account/{sa_id}/regenerateApiKey"
    print(url)
    result = requests.post(url,headers=headers,json=data,verify=False)
    ##returns 200 with the key
    return result.json()






In [None]:
import base64
import json

def decode_jwt(token: str):
    header_b64, payload_b64, signature_b64 = token.split('.')

    # Base64URL decoding requires padding correction
    def b64url_decode(b64_str):
        padding = '=' * (-len(b64_str) % 4)
        return base64.urlsafe_b64decode(b64_str + padding)

    header = json.loads(b64url_decode(header_b64))
    payload = json.loads(b64url_decode(payload_b64))
    
    return {
        "header": header,
        "payload": payload,
        "signature": signature_b64  # raw, usually verified separately
    }



In [None]:
def get_oauth_token():
    ## Let us use the current users token as a proxy for a sa token
    domino_api_proxy= os.environ['DOMINO_API_PROXY']

    #This is a proxy for the service account tokem
    sa_token = requests.get(f"{domino_api_proxy}/access-token").text
    return sa_token

In [None]:
from domino import Domino
def get_hw_tier_id(name: str):
    api_proxy = os.environ['DOMINO_API_PROXY']
    url = f"{api_proxy}/v4/hardwareTier"
    results = requests.get(url)
    hw_tiers = results.json()['hardwareTiers']
    for h in hw_tiers:
        if h['name']==name:
            return h

def get_environment_id(name: str):
    api_proxy = os.environ['DOMINO_API_PROXY']
    url = f"{api_proxy}/v4/environments/self"
    results = requests.get(url)
    envs = results.json()
    for e in envs:
        if e['name']==name:
            return e


In [None]:
## We first configure the MLFLOW_TRACKING_URI. Get your own
external_mlflow_tracking_uri="https://marcdo77364.cs.domino.tech"
os.environ['MLFLOW_TRACKING_URI']=external_mlflow_tracking_uri

sa_token = get_oauth_token()
#sa_id="68ac9b59ea374e2b81c48d75"
decoded = decode_jwt(sa_token)
sa_id=decoded['payload']['sub']
api_key = regenerate_api_key_for_sa(external_mlflow_tracking_uri,sa_id,sa_token)
os.environ['DOMINO_USER_API_KEY']=api_key["raw"]
print(os.environ['DOMINO_USER_API_KEY'])




In [None]:
domino = Domino("wadkars/ddl-end-to-end-demo",auth_token=get_oauth_token())
compute_env_id = get_environment_id('Domino Standard Environment Py3.10 R4.4')['id']
title = f"External MLFLOW simulation for {os.environ['DOMINO_STARTING_USERNAME']}"
j = domino.job_start(title=title,command="/tmp/ls",
                 hardware_tier_name="Small",environment_id=compute_env_id)
os.environ['DOMINO_RUN_ID']=j['id']
print(os.environ['DOMINO_RUN_ID'])

In [None]:
_request_header_provider_registry._registry.clear() #This clears any existing values
set_mlflow_tracking_uri()
register_domino_api_key_request_header_provider()
register_domino_execution_request_header_provider()

In [None]:
from typing import Optional, Iterable, Union
import pandas as pd
import numpy as np

def load_xgb_pyfunc_version(model_name: str, version: Union[int, str]) -> mlflow.pyfunc.PyFuncModel:
    """
    Load a registered model *version* as a PyFunc model.
    Works regardless of how the model was trained/logged (XGBoost flavor included).

    Example URI: models:/my_model/3
    """
    uri = f"models:/{model_name}/{version}"
    return mlflow.pyfunc.load_model(uri)


def predict_with_pyfunc(model: mlflow.pyfunc.PyFuncModel,
                        X: Union[pd.DataFrame, np.ndarray, Iterable[Iterable[float]]]) -> np.ndarray:
    """
    Run predictions using a PyFunc model.
    Accepts pandas DataFrame or numpy-like 2D structure.
    """
    if not isinstance(X, pd.DataFrame):
        X = pd.DataFrame(X)
    print(X)
    y_pred = model.predict(X)
    # Ensure numpy array
    return np.asarray(y_pred)

In [None]:
my_name = os.environ['DOMINO_STARTING_USERNAME']
exp_model_name = f"xgb-demo-{my_name}"

## Now you are configured to connect to domino mlflow
pyfunc_model = load_xgb_pyfunc_version(exp_model_name, "latest")
sample = np.random.rand(1, 30)  # shape must match your training columns
preds = predict_with_pyfunc(pyfunc_model, sample)
print("PyFunc preds:", preds)