In [1]:
!pip install torch==2.3.1+cu121 torchvision==0.18.1+cu121 -f https://download.pytorch.org/whl/torch_stable.html

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Looking in links: https://download.pytorch.org/whl/torch_stable.html

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m


In [2]:
!pip install --upgrade onnxruntime-gpu==1.18.0 

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m


In [3]:
import os
import shutil

import os
import zipfile
import urllib.request

In [4]:
import os, onnxruntime as ort
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # or unset it: os.environ.pop("CUDA_VISIBLE_DEVICES", None)

# Must be the GPU build:
# pip install --upgrade onnxruntime-gpu==1.18.0   # pick version matching your CUDA 12.x stack

prov = ort.get_available_providers()
print(prov)
assert "CUDAExecutionProvider" in prov, f"ORT not GPU-enabled. Providers={prov}"

['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'AzureExecutionProvider', 'CPUExecutionProvider']


In [5]:
datasets_dir = os.environ['DOMINO_DATASETS_DIR']
project_ds_folder = os.environ['DOMINO_PROJECT_NAME'] 

download_base_folder=f"{datasets_dir}/{project_ds_folder}"
models_folder = "models"
yolo_model_name="yolov8n"

yolo_onnx_file_name=f"{yolo_model_name}.onnx"

In [6]:
# Adds: log entire Ultralytics run dir to MLflow, then delete it.
# Writes the run under /tmp/ultra_runs/<name>, logs it, removes it.

import os, time, yaml, math, random, json, shutil, statistics as stats
from pathlib import Path
from typing import Dict, Any, List
from datetime import datetime
import mlflow
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from ultralytics import YOLO

try:
    import mlflow
    _HAS_MLFLOW = True
except Exception:
    _HAS_MLFLOW = False


def _ensure_dir(p: Path) -> Path:
    p.mkdir(parents=True, exist_ok=True)
    return p


def _latency_benchmark(model: YOLO, image_paths: List[Path], imgsz: int, device: str) -> Dict[str, Any]:
    if not image_paths:
        return {"p50_ms": None, "p90_ms": None, "p99_ms": None, "mean_ms": None, "count": 0}
    lat_ms = []
    _ = model.predict(source=str(image_paths[0]), imgsz=imgsz, device=device, verbose=False)  # warmup
    for p in image_paths:
        t0 = time.perf_counter()
        _ = model.predict(source=str(p), imgsz=imgsz, device=device, verbose=False)
        lat_ms.append((time.perf_counter() - t0) * 1000.0)
    lat_sorted = sorted(lat_ms)
    def pct(v, q):
        idx = min(len(v)-1, max(0, int(math.ceil(q*len(v))-1)))
        return v[idx]
    return {
        "p50_ms": pct(lat_sorted, 0.50),
        "p90_ms": pct(lat_sorted, 0.90),
        "p99_ms": pct(lat_sorted, 0.99),
        "mean_ms": float(stats.mean(lat_sorted)),
        "count": len(lat_sorted),
        "raw_ms": lat_ms,
    }


def ensure_mlflow_experiment(experiment_name: str) -> int:
    """
    Ensure an MLflow experiment with the given name exists.
    If it does not, create it. Then set it as the current experiment.

    Args:
        experiment_name: Name of the experiment
        artifact_location: Optional path or URI where artifacts will be stored

    Returns:
        experiment_id (int)
    """
    try:
        exp = mlflow.get_experiment_by_name(experiment_name)
        if exp is None:
            exp_id = mlflow.create_experiment(
                experiment_name
            )
        else:
            exp_id = exp.experiment_id
        mlflow.set_experiment(experiment_name)
        return exp_id
    except Exception as e:
        raise RuntimeError(f"Failed to ensure experiment {experiment_name}: {e}")


def evaluate_model(
    base_path: str,
    model_path: str,
    imgsz: int = 640,
    device: str = "cpu",
    limit_images: int = 1000,
    subset_seed: int = 0,
    experiment_name:str = None
) -> Dict[str, Any]:
    base = Path(base_path)
    img_dir = base / "images" / "val2017"
    if not img_dir.exists():
        raise FileNotFoundError(f"Missing image dir: {img_dir}")

    artifacts = _ensure_dir(base / "artifacts")
    plots_dir = _ensure_dir(artifacts / "plots")
    metrics_dir = _ensure_dir(artifacts / "metrics")
    config_dir = _ensure_dir(artifacts / "config")

    all_imgs = sorted([p for p in img_dir.glob("*.jpg")] +
                      [p for p in img_dir.glob("*.jpeg")] +
                      [p for p in img_dir.glob("*.png")])
    if not all_imgs:
        raise RuntimeError(f"No images found under {img_dir}")
    rng = random.Random(subset_seed)
    rng.shuffle(all_imgs)
    sub_imgs = all_imgs[:min(limit_images, len(all_imgs))]

    subset_list = artifacts / "val_subset.txt"
    with open(subset_list, "w") as f:
        for p in sub_imgs:
            f.write(str(p.resolve()) + "\n")

    data_config = {
        'path': str(base),
        'train': 'images/val2017',
        'val': str(subset_list),
        'names': list(range(80))
    }
    yaml_path = base / "coco_val_subset.yaml"
    with open(yaml_path, "w") as f:
        yaml.dump(data_config, f)

    model = YOLO(model_path, task="detect")

    # Force Ultralytics run dir into /tmp, give it a stable name
    tmp_project = Path("/tmp/ultra_runs")
    run_name = f"val_{Path(model_path).stem}_subset{len(sub_imgs)}_" \
           f"{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    val_res = model.val(
        data=str(yaml_path),
        imgsz=imgsz,
        device=device,
        save_json=False,
        verbose=False,
        project=str(tmp_project),   # <- /tmp base
        name=run_name,              # <- folder name
        exist_ok=True               # <- don’t auto-increment
        
    )
    save_dir = Path(val_res.save_dir)  # /tmp/ultra_runs/<run_name>

    metrics = {
        "map": float(val_res.box.map),
        "ap50": float(val_res.box.map50),
        "ap75": float(val_res.box.map75),
        "mean_precision": float(val_res.box.mp),
        "mean_recall": float(val_res.box.mr),
        "evaluated_images": len(sub_imgs)
    }
    (metrics_dir / "headline.json").write_text(json.dumps(metrics, indent=2))

    latency = _latency_benchmark(model, sub_imgs[:min(100, len(sub_imgs))], imgsz=imgsz, device=device)
    (metrics_dir / "latency.json").write_text(json.dumps({k: v for k, v in latency.items() if k != "raw_ms"}, indent=2))

    if latency.get("raw_ms"):
        plt.figure()
        plt.hist(latency["raw_ms"], bins=20)
        plt.xlabel("Latency (ms)")
        plt.ylabel("Count")
        plt.title("Per-image latency (batch=1)")
        plt.tight_layout()
        plt.savefig(plots_dir / "latency_hist.png")
        plt.close()

    eval_cfg = {
        "imgsz": imgsz,
        "device": device,
        "subset_seed": subset_seed,
        "limit_images": limit_images,
        "subset_list": str(subset_list),
        "model_path": str(model_path),
        "ultralytics_save_dir": str(save_dir),
    }
    (config_dir / "eval.json").write_text(json.dumps(eval_cfg, indent=2))

    if _HAS_MLFLOW:
        if experiment_name:
            ensure_mlflow_experiment(experiment_name)
        try:
            with mlflow.start_run(run_name=run_name):
                mlflow.log_params({
                    "imgsz": imgsz, "device": device,
                    "limit_images": len(sub_imgs), "subset_seed": subset_seed
                })
                mlflow.log_metrics({
                    "map": metrics["map"], "ap50": metrics["ap50"], "ap75": metrics["ap75"],
                    "mean_precision": metrics["mean_precision"], "mean_recall": metrics["mean_recall"],
                    "latency_p50_ms": latency.get("p50_ms") or 0.0,
                    "latency_p90_ms": latency.get("p90_ms") or 0.0,
                    "latency_p99_ms": latency.get("p99_ms") or 0.0,
                    "latency_mean_ms": latency.get("mean_ms") or 0.0,
                })
                # Log our structured artifacts
                mlflow.log_artifact(str(metrics_dir / "headline.json"))
                mlflow.log_artifact(str(metrics_dir / "latency.json"))
                mlflow.log_artifact(str(config_dir / "eval.json"))
                mlflow.log_artifact(str(artifacts / "val_subset.txt"))
                if (plots_dir / "latency_hist.png").exists():
                    mlflow.log_artifact(str(plots_dir / "latency_hist.png"))
                # Log the full Ultralytics run directory (results.csv/png, confusion matrix, samples, etc.)
                if save_dir.exists():
                    mlflow.log_artifacts(str(save_dir), artifact_path="ultralytics_run")
        finally:
            # Always clean up the /tmp run directory
            if save_dir.exists():
                shutil.rmtree(save_dir, ignore_errors=True)
            # Optional: prune empty parent
            parent = tmp_project
            if parent.exists() and not any(parent.iterdir()):
                shutil.rmtree(parent, ignore_errors=True)

    else:
        # If MLflow isn’t available, still clean up /tmp run dir
        if save_dir.exists():
            shutil.rmtree(save_dir, ignore_errors=True)

    return metrics


In [7]:
model_path = f"{download_base_folder}/{models_folder}/{yolo_onnx_file_name}"
print(model_path)
base_folder=f"{download_base_folder}/coco"


/mnt/data/reference-cv-model-comparison/models/yolov8n.onnx


In [9]:
domino_user_name = os.environ['DOMINO_USER_NAME']
experiment_name=f"cv-comparison-{domino_user_name}"
metrics = evaluate_model(f"{download_base_folder}/coco",model_path,limit_images=50,experiment_name=experiment_name,device="0")
metrics = evaluate_model(f"{download_base_folder}/coco",model_path,limit_images=50,experiment_name=experiment_name,device="cpu")


Ultralytics 8.3.181 🚀 Python-3.8.10 torch-2.3.1+cu121 CUDA:0 (NVIDIA A10G, 22724MiB)
Loading /mnt/data/reference-cv-model-comparison/models/yolov8n.onnx for ONNX Runtime inference...
Using ONNX Runtime CUDAExecutionProvider
Setting batch=1 input of shape (1, 3, 640, 640)
[34m[1mval: [0mFast image access ✅ (ping: 0.7±0.1 ms, read: 110.5±49.7 MB/s, size: 168.9 KB)


[34m[1mval: [0mScanning /mnt/data/reference-cv-model-comparison/coco/labels/val2017... 50 images, 2 backgrounds, 0 corrupt: 100%|██████████| 50/50 [00:00<00:00, 665.16it/s]

[34m[1mval: [0mNew cache created: /mnt/data/reference-cv-model-comparison/coco/labels/val2017.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:00<00:00, 87.80it/s]


                   all         50        564      0.528       0.53      0.585      0.446
Speed: 0.6ms preprocess, 4.5ms inference, 0.0ms loss, 1.8ms postprocess per image
Results saved to [1m/tmp/ultra_runs/val_yolov8n_subset50_20250820_142810[0m
Loading /mnt/data/reference-cv-model-comparison/models/yolov8n.onnx for ONNX Runtime inference...
Using ONNX Runtime CUDAExecutionProvider
Ultralytics 8.3.181 🚀 Python-3.8.10 torch-2.3.1+cu121 CPU (AMD EPYC 7R32)
Loading /mnt/data/reference-cv-model-comparison/models/yolov8n.onnx for ONNX Runtime inference...
Using ONNX Runtime CPUExecutionProvider
Setting batch=1 input of shape (1, 3, 640, 640)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 101.4±41.7 MB/s, size: 150.3 KB)


[34m[1mval: [0mScanning /mnt/data/reference-cv-model-comparison/coco/labels/val2017.cache... 50 images, 2 backgrounds, 0 corrupt: 100%|██████████| 50/50 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:21<00:00,  2.34it/s]


                   all         50        564      0.528      0.531      0.585      0.446
Speed: 5.2ms preprocess, 240.5ms inference, 0.0ms loss, 132.4ms postprocess per image
Results saved to [1m/tmp/ultra_runs/val_yolov8n_subset50_20250820_142821[0m
Loading /mnt/data/reference-cv-model-comparison/models/yolov8n.onnx for ONNX Runtime inference...
Using ONNX Runtime CPUExecutionProvider
