In [1]:
import os
import shutil
import zipfile
import urllib.request
import utilities

## Configure dataset and model directories

- `datasets_dir` → Root directory provided by Domino for all mounted datasets (`DOMINO_DATASETS_DIR`).
- `project_ds_folder` → Current project name, pulled from environment (`DOMINO_PROJECT_NAME`). This assumes that the default dataset associated with this project is accessible to the user running the notebook. Adjust accordingly.
- `download_base_folder` → Project-scoped path under the datasets mount, ensuring all artifacts are written in a reproducible, project-local namespace.
- `models_folder` → Subdirectory dedicated to storing ONNX exports (`models/`).

Together, these variables define the canonical storage location for exported models within the Domino project context.

In [2]:
datasets_dir = os.environ['DOMINO_DATASETS_DIR']
project_ds_folder = os.environ['DOMINO_PROJECT_NAME'] 

download_base_folder=f"{datasets_dir}/{project_ds_folder}"
models_folder = "models"

## Download, Export, and Register YOLO Models with MLflow Client

This function automates the workflow of exporting Ultralytics YOLO models to ONNX format and registering them as **MLflow Registered Models**.

### Key steps:

1. **Imports**  
   - `onnx`, `mlflow`, and `mlflow.onnx` → For ONNX handling and MLflow logging.  
   - `YOLO` (Ultralytics) → For downloading and exporting models.  
   - `MlflowClient` + `RunsArtifactRepository` → To programmatically register models.  

2. **Function definition:**
   - `download_and_register_yolov_models_with_client(download_path, models_folder, experiment_name)`  
   Creates an experiment, sets up folders, and registers models.

3. **Model requests list:**  
   - Maps YOLO checkpoint filenames (e.g., `yolov8n.pt`) to clean registry names (e.g., `yolov8n`).

4. **Export → Move → Cleanup:**  
   - Each YOLO model is exported to ONNX.  
   - The `.onnx` file is moved into a structured output directory.  
   - Original `.pt` checkpoints are removed to avoid clutter.  

5. **Log to MLflow Run:**  
   - ONNX model is logged under the artifact path `"model"`.  
   - A `runs:/` URI is generated and resolved to a storage URI.  

6. **Register model versions:**  
   - Creates the registered model entry if it doesn’t already exist.  
   - Uses `MlflowClient.create_model_version` to version each export.  
   - Tracks model versions in the `registered_versions` dictionary.  

7. **Output:**  
   - Returns a dict mapping model registry names to their latest registered version numbers.  

This approach ensures that:
- Every YOLO model is consistently versioned in MLflow.  
- The registry contains clean, reusable ONNX exports.  
- Comparison experiments can later pull models by name/version.  


In [None]:
import os
import shutil
import onnx
import mlflow
import mlflow.onnx
from ultralytics import YOLO
from mlflow.tracking import MlflowClient
from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository
from mlflow.exceptions import MlflowException

def download_and_register_yolov_models_with_client(
    download_path: str,
    models_folder: str,
    experiment_name: str = "YOLO_Export"
):

    mlflow.set_experiment(experiment_name)
    client = MlflowClient()

    output_dir = os.path.join(download_path, models_folder)
    os.makedirs(output_dir, exist_ok=True)

    # Models to export -> register names (clean, no extension)
    requests = [
        ("yolov8n.pt", "yolov8n"),
        ("yolov5n.pt", "yolov5n"),
        ("yolov8m.pt", "yolov8m"),
        ("yolov8s.pt", "yolov8s"),
    ]

    registered_versions = {}

    for requested_name, registry_name in requests:
        print(f"[Export] {requested_name} -> ONNX")
        model = YOLO(requested_name)
        onnx_src_path = model.export(format="onnx")  # returns str path to .onnx

        if not onnx_src_path or not os.path.isfile(onnx_src_path):
            raise RuntimeError(f"Export failed for {requested_name}")

        onnx_filename = os.path.basename(onnx_src_path)
        final_path = os.path.join(output_dir, onnx_filename)
        try:
            shutil.move(onnx_src_path, final_path)
        except OSError:
            shutil.copy2(onnx_src_path, final_path)
            os.remove(onnx_src_path)

        if os.path.exists(requested_name):
            os.remove(requested_name)

        # Log ONNX with model flavor so the registry can version it
        print(f"[Log] {registry_name} to MLflow run")
        onnx_model = onnx.load(final_path)

        with mlflow.start_run(run_name=f"register_{registry_name}") as run:
            mlflow.onnx.log_model(onnx_model=onnx_model, artifact_path="model")

            # Build a runs:// URI then convert to the underlying store URI
            runs_uri = f"runs:/{run.info.run_id}/model"
            model_src = RunsArtifactRepository.get_underlying_uri(runs_uri)

            # Ensure Registered Model exists
            try:
                client.create_registered_model(registry_name)
            except MlflowException:
                # Already exists or registry backend returns conflict; proceed to versioning
                pass

            # Create a Model Version pointing to this run’s logged model
            print(f"[Register] {registry_name} -> new version from run {run.info.run_id}")
            mv = client.create_model_version(
                name=registry_name,
                source=model_src,
                run_id=run.info.run_id,
                description=f"Auto-registered {registry_name} ONNX export",
            )
            registered_versions[registry_name] = mv.version

    print("[Done] All models exported and registered.")
    return registered_versions




In [None]:
## Download and register yolo models

utilities.ensure_mlflow_experiment(utilities.model_registration_experiment_name)
download_and_register_yolov_models_with_client(download_base_folder,models_folder,utilities.model_registration_experiment_name)


## Download and Prepare COCO 2017 Validation Dataset

This utility function automates downloading, extracting, and preparing the **COCO 2017 validation dataset** (`val2017`) in a format suitable for YOLO training/evaluation.

### Key steps:

1. **Setup**
   - Creates `base_dir` if it doesn’t exist.
   - Defines dataset sources:
     - Images: `http://images.cocodataset.org/zips/val2017.zip`
     - Annotations: `http://images.cocodataset.org/annotations/annotations_trainval2017.zip`

2. **Download**
   - Checks if image and annotation ZIPs already exist in `base_dir`.
   - If not, downloads them with `urllib.request.urlretrieve`.

3. **Extract Images**
   - Unzips `val2017.zip`.
   - Moves extracted folder into `base_dir/images/val2017` for consistency.

4. **Extract Annotations**
   - Unzips `annotations_trainval2017.zip`.
   - Verifies `instances_val2017.json` exists inside `base_dir/annotations`.

5. **Convert COCO → YOLO Labels**
   - Creates a `labels/val2017/` directory.
   - Uses `pycocotools.COCO` to:
     - Load categories and remap COCO’s sparse category IDs into a contiguous `[0..79]` index.
     - Iterate through images, collecting bounding box annotations.
     - Normalize coordinates (`x_center`, `y_center`, `width`, `height`) relative to image dimensions.
     - Save YOLO-formatted `.txt` files (one per image) into `labels/val2017/`.

6. **Output**
   - Prints confirmation once images, annotations, and YOLO label files are fully prepared.

### Directory Layout After Run
```
/mnt/data/<dataset>/coco/
                        images/val2017/ # validation images
                        annotations/ # COCO annotation JSONs
                        labels/val2017/ # YOLO .txt label files
```

In [3]:
import os, urllib.request, zipfile, shutil
from pycocotools.coco import COCO

def download_and_prepare_coco2017_yolo(base_dir):
    os.makedirs(base_dir, exist_ok=True)

    img_set = "val2017"
    images_url = f"http://images.cocodataset.org/zips/{img_set}.zip"
    ann_url    = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"

    images_zip = os.path.join(base_dir, f"{img_set}.zip")
    ann_zip    = os.path.join(base_dir, "annotations_trainval2017.zip")

    # download
    if not os.path.exists(images_zip):
        print(f"Downloading {os.path.basename(images_zip)}...")
        urllib.request.urlretrieve(images_url, images_zip)

    if not os.path.exists(ann_zip):
        print(f"Downloading {os.path.basename(ann_zip)}...")
        urllib.request.urlretrieve(ann_url, ann_zip)

    # extract images
    target_images_dir = os.path.join(base_dir, "images", img_set)
    if not os.path.exists(target_images_dir):
        print(f"Extracting {os.path.basename(images_zip)}...")
        with zipfile.ZipFile(images_zip, "r") as zf:
            zf.extractall(base_dir)  # creates base_dir/val2017
        os.makedirs(os.path.join(base_dir, "images"), exist_ok=True)
        src = os.path.join(base_dir, img_set)  # base_dir/val2017
        if os.path.exists(src):
            shutil.move(src, target_images_dir)
        else:
            # already extracted to correct place by prior run
            os.makedirs(target_images_dir, exist_ok=True)

    # extract annotations
    ann_dir = os.path.join(base_dir, "annotations")
    ann_json = os.path.join(ann_dir, f"instances_{img_set}.json")
    if not os.path.exists(ann_json):
        print(f"Extracting {os.path.basename(ann_zip)}...")
        with zipfile.ZipFile(ann_zip, "r") as zf:
            zf.extractall(base_dir)  # creates base_dir/annotations/*.json
    assert os.path.exists(ann_json), f"Missing {ann_json}"

    # build labels
    labels_dir = os.path.join(base_dir, "labels", img_set)
    if not os.path.exists(labels_dir):
        print(f"Converting COCO → YOLO labels into {labels_dir} ...")
        os.makedirs(labels_dir, exist_ok=True)

        coco = COCO(ann_json)
        cats = coco.loadCats(coco.getCatIds())
        # COCO category ids are not 0..79; map to contiguous 0-based
        catid2cls = {c["id"]: i for i, c in enumerate(cats)}

        img_ids = coco.getImgIds()
        for img in coco.loadImgs(img_ids):
            w, h = img["width"], img["height"]
            ann_ids = coco.getAnnIds(imgIds=img["id"], iscrowd=False)
            anns = coco.loadAnns(ann_ids)

            lines = []
            for a in anns:
                x, y, bw, bh = a["bbox"]
                if bw <= 0 or bh <= 0:
                    continue
                x_c = (x + bw / 2) / w
                y_c = (y + bh / 2) / h
                bw_n = bw / w
                bh_n = bh / h
                cls = catid2cls[a["category_id"]]
                lines.append(f"{cls} {x_c:.6f} {y_c:.6f} {bw_n:.6f} {bh_n:.6f}")

            label_path = os.path.join(labels_dir, os.path.splitext(img["file_name"])[0] + ".txt")
            with open(label_path, "w") as f:
                f.write("\n".join(lines))

    print("COCO 2017 val images, annotations, and YOLO labels are ready.")


NOTE! Installing ujson may make loading annotations faster.


In [4]:
#Execute it
base_coco_dir = f"{download_base_folder}/coco"
download_and_prepare_coco2017_yolo(base_coco_dir)

COCO 2017 val images, annotations, and YOLO labels are ready.
