## Update to model registry

Use the regular Python 3 kernel

In [8]:
# MLflow experiment and model registry name
EXPERIMENT_NAME = "yolov8n_new_xyz"
REGISTERED_MODEL_NAME = "yolov8n_test"

In [9]:
from pathlib import Path
import mlflow

# ==========================
# Config (edit as needed)
# ==========================
# Folder that contains model.onnx and config.pbtxt
SRC_DIR = Path(f"models/{REGISTERED_MODEL_NAME}")



# Optional: set tracking URI if you're not using local ./mlruns
# mlflow.set_tracking_uri("http://your-mlflow-server:5000")

# ==========================
# Sanity checks
# ==========================
if not SRC_DIR.exists():
    raise FileNotFoundError(f"Source directory not found: {SRC_DIR}")

onnx_path = SRC_DIR / "model.onnx"
config_path = SRC_DIR / "config.pbtxt"

if not onnx_path.exists():
    raise FileNotFoundError(f"ONNX file not found: {onnx_path}")
if not config_path.exists():
    raise FileNotFoundError(f"config.pbtxt not found: {config_path}")

print(f"Using source directory: {SRC_DIR.resolve()}")

# ==========================
# Set / create experiment
# ==========================
mlflow.set_experiment(EXPERIMENT_NAME)

# ==========================
# Start run, log artifacts
# ==========================
with mlflow.start_run(run_name="yolov8n-triton-bundle") as run:
    run_id = run.info.run_id
    print(f"MLflow run_id: {run_id}")

    # This copies the ENTIRE folder models/yolov8n/*
    # into artifacts/model/
    mlflow.log_artifacts(str(SRC_DIR), artifact_path="model")

    # Build model URI pointing to the "model" artifact folder
    model_uri = f"runs:/{run_id}/model"
    print(f"Model artifacts logged at: {model_uri}")

    # ==========================
    # Register the model
    # ==========================
    registration = mlflow.register_model(
        model_uri=model_uri,
        name=REGISTERED_MODEL_NAME,
    )

print("========== Registration Complete ==========")
print(f"Registered model name   : {REGISTERED_MODEL_NAME}")
print(f"Registered model version: {registration.version}")
print(f"Source run_id           : {run_id}")
print(f"Source artifacts URI    : {model_uri}")


Using source directory: /mnt/models/yolov8n_test
MLflow run_id: 0cad8fec91114af597541f845f23421a
Model artifacts logged at: runs:/0cad8fec91114af597541f845f23421a/model


Registered model 'yolov8n_test' already exists. Creating a new version of this model...
2025/12/07 21:56:27 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: yolov8n_test, version 2
Created version '2' of model 'yolov8n_test'.


üèÉ View run yolov8n-triton-bundle at: http://127.0.0.1:8765/#/experiments/5/runs/0cad8fec91114af597541f845f23421a
üß™ View experiment at: http://127.0.0.1:8765/#/experiments/5
Registered model name   : yolov8n_test
Registered model version: 2
Source run_id           : 0cad8fec91114af597541f845f23421a
Source artifacts URI    : runs:/0cad8fec91114af597541f845f23421a/model


In [10]:
# ==========================
# Config ‚Äì edit as needed
# ==========================
MODEL_VERSION = registration.version  # or "1", or whatever version you want to pull
#MODEL_VERSION = 1  # or "1", or whatever version you want to pull

In [3]:
## Download from model registry

In [11]:
from pathlib import Path
import shutil
import mlflow



# Where you want the final layout written
OUTPUT_ROOT = Path("downloaded-models")

# Optional: set tracking URI if you're not using local ./mlruns
# mlflow.set_tracking_uri("http://your-mlflow-server:5000")

# ==========================
# Build MLflow model URI
# ==========================
model_uri = f"models:/{REGISTERED_MODEL_NAME}/{MODEL_VERSION}"
print(f"Downloading artifacts from: {model_uri}")

# This downloads the registered model's artifacts to a local temp dir
local_dir = mlflow.artifacts.download_artifacts(artifact_uri=model_uri)
local_dir = Path(local_dir)
print(f"Downloaded to temp dir: {local_dir}")

# ==========================
# Find config.pbtxt and .onnx
# ==========================
config_files = list(local_dir.rglob("config.pbtxt"))
onnx_files = list(local_dir.rglob("*.onnx"))

if not config_files:
    raise FileNotFoundError(f"No config.pbtxt found under {local_dir}")
if not onnx_files:
    raise FileNotFoundError(f"No .onnx files found under {local_dir}")

config_src = config_files[0]         # first match
onnx_src = onnx_files[0]             # first match (adjust if you expect multiple)

print(f"Found config: {config_src}")
print(f"Found ONNX  : {onnx_src}")

# ==========================
# Build desired output layout
# ==========================

OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)

model_root_dir = OUTPUT_ROOT / REGISTERED_MODEL_NAME
version_dir = model_root_dir / str(MODEL_VERSION)


model_root_dir.mkdir(parents=True, exist_ok=True)
version_dir.mkdir(parents=True, exist_ok=True)

# Destinations:
config_dest = model_root_dir / "config.pbtxt"
onnx_dest = version_dir / onnx_src.name  # keep same filename (e.g., model.onnx)

# ==========================
# Copy files into place
# ==========================
shutil.copy2(config_src, config_dest)
shutil.copy2(onnx_src, onnx_dest)

print("=========== Done ===========")
print(f"Config copied to : {config_dest}")
print(f"ONNX copied to   : {onnx_dest}")
print()
print("Final structure:")
print(str(OUTPUT_ROOT))
print(f"  ‚îî‚îÄ {REGISTERED_MODEL_NAME}/")
print(f"       ‚îú‚îÄ config.pbtxt")
print(f"       ‚îî‚îÄ {MODEL_VERSION}/")
print(f"            ‚îî‚îÄ {onnx_src.name}")


Downloading artifacts from: models:/yolov8n_test/2


Downloading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Downloaded to temp dir: /tmp/tmp2a_d5m1b
Found config: /tmp/tmp2a_d5m1b/config.pbtxt
Found ONNX  : /tmp/tmp2a_d5m1b/model.onnx
Config copied to : downloaded-models/yolov8n_test/config.pbtxt
ONNX copied to   : downloaded-models/yolov8n_test/2/model.onnx

Final structure:
downloaded-models
  ‚îî‚îÄ yolov8n_test/
       ‚îú‚îÄ config.pbtxt
       ‚îî‚îÄ 2/
            ‚îî‚îÄ model.onnx


## Copy to the shared EDV to deploy to Triton

In [18]:
from pathlib import Path
import shutil
import os
# -----------------------------
# Config ‚Äì edit these
# -----------------------------

SOURCE_ROOT = Path(f"./downloaded-models")                  # where your model folder exists
TARGET_ROOT = Path("/domino/edv/domino-inference-test-triton-inference-server-pvc")        # where you want to copy the versioned folder
#TARGET_ROOT = Path("/domino/edv/triton-management-rw-dev-pvc")        # where you want to copy the versioned folder


# -----------------------------
# Build paths
# -----------------------------
src_config_file = SOURCE_ROOT / REGISTERED_MODEL_NAME / "config.pbtxt"
dst_config_file = TARGET_ROOT / REGISTERED_MODEL_NAME / "config.pbtxt"
src_version_dir = SOURCE_ROOT / REGISTERED_MODEL_NAME / str(MODEL_VERSION)
dst_version_dir = TARGET_ROOT / REGISTERED_MODEL_NAME / str(MODEL_VERSION)


# Ensure source exists
if not src_version_dir.exists():
    raise FileNotFoundError(f"Source version directory not found: {src_version_dir}")

# Create destination root
dst_version_dir.mkdir(parents=True, exist_ok=True)

def copy_file_simple(src: Path, dst: Path):
    """Copy file contents only, no metadata, safe for S3/fuse."""
    dst.parent.mkdir(parents=True, exist_ok=True)
    with src.open("rb") as fsrc, dst.open("wb") as fdst:        
        shutil.copyfileobj(fsrc, fdst)
        print(f"Copied {src} -> {dst}")

#Copy config.pbtxt
copy_file_simple(src_config_file,dst_config_file)



# -----------------------------
# Recursively copy tree
# -----------------------------

for root, dirs, files in os.walk(src_version_dir):
    root_path = Path(root)
    rel = root_path.relative_to(src_version_dir)
    current_dst_root = dst_version_dir / rel

    # Ensure subdirs exist at destination
    for d in dirs:
        (current_dst_root / d).mkdir(parents=True, exist_ok=True)

    # Copy files without metadata
    for f in files:
        
        src_file = root_path / f
        dst_file = current_dst_root / f
        copy_file_simple(src_file, dst_file)

print("========== Done ==========")
print(f"Copied version folder:")
print(f"  {src_version_dir}  ‚Üí  {dst_version_dir}")

Copied downloaded-models/yolov8n_test/config.pbtxt to /domino/edv/domino-inference-test-triton-inference-server-pvc/yolov8n_test/config.pbtxt
Copied downloaded-models/yolov8n_test/2/model.onnx to /domino/edv/domino-inference-test-triton-inference-server-pvc/yolov8n_test/2/model.onnx
Copied version folder:
  downloaded-models/yolov8n_test/2  ‚Üí  /domino/edv/domino-inference-test-triton-inference-server-pvc/yolov8n_test/2


## Test

In [None]:
!pip install --no-cache-dir \
    "grpcio==1.67.1" \
    "grpcio-tools==1.67.1" \
    "protobuf==5.28.3" \
    "tritonclient[grpc]==2.61.0" \
    "numpy==2.2.6" \
    "requests==2.32.4" \
    "opencv-python-headless==4.10.0.84"
!pip install opencv-python

In [None]:
import os
os.environ["MM_ADDR"] = "triton-inference-server-proxy.domino-inference-test.svc.cluster.local:50051"
os.environ["VIDEO_PATH"] = "./samples/video.avi"
os.environ["MODEL_NAME"] = "yolov8n_new"
os.environ["MODEL_VERSION"] = "1"
os.environ["INPUT_NAME"] = "images"
os.environ["OUTPUT_NAMES"] = "output0"
os.environ["IMG_SIZE"] = "640"
os.environ["PARSE_NUMPY"] = "1"

!python3 /mnt/src/mm_client.py

In [None]:
import requests
import os

ADMIN_API_URL = "http://triton-inference-server-admin.domino-inference-test.svc.cluster.local:8000/v1/deployments/inference-server/scale"
# Use whichever header your API expects.
# If your Domino instance requires "Authorization: ApiKey <key>" use that.



# --- REQUEST BODY ---------------------------------------------------

payload = {
    "replicas": 0
}

# --- CALL ENDPOINT --------------------------------------------------

def scale_inference_server(payload,AUTH_HEADER):
    resp = requests.post(
        ADMIN_API_URL,
        headers=AUTH_HEADER,
        json=payload,
        timeout=10
    )

    if resp.status_code != 200:
        print("‚ùå Error scaling deployment")
        print("Status:", resp.status_code)
        print("Body:", resp.text)
        return

    print("‚úÖ Successfully scaled deployment:")
    print(json.dumps(resp.json(), indent=2))



In [None]:
import json
payload = {
    "replicas": 1
}

token = requests.get(os.environ['DOMINO_API_PROXY']+ "/access-token").text

AUTH_HEADER = {
    "Authorization": f"Bearer {token}"
}
scale_inference_server(payload,AUTH_HEADER)

In [None]:
import requests
import os
requests.get(os.environ['DOMINO_API_PROXY']+ "/access-token")