In [18]:
# Generic imports (can be reused in other test cells)
# Generic imports
import os
import json
from pathlib import Path
import shutil
from unittest.mock import patch

# YOLO
from ultralytics import YOLO

# Evaluation utilities
from mlops.evaluate.metrics import compute_metrics
from model_service.core.yolov8_loader import YOLOv8Loader

from mlops.evaluate.validate_model import validate_model
from mlops.model_registry.registry_utils import list_registry, register_model
import mlops.evaluate.validate_model as validate_module


In [3]:
# Base directory
BASE_DIR = Path().resolve()

# Model directories
INCOMING_MODELS_DIR = BASE_DIR / "models/incoming"
BLESSED_MODELS_DIR = BASE_DIR / "models/blessed"
REJECTED_MODELS_DIR = BASE_DIR / "models/rejected"
ARCHIVE_MODELS_DIR = BASE_DIR / "models/archive"

# Test dataset for evaluation
TEST_DATA_DIR = BASE_DIR / "federated_training/datasets/test"

# Example model under incoming folder
MODEL_PATH = INCOMING_MODELS_DIR / "yolov8n.pt"

# Evaluation report output
REPORTS_DIR = BASE_DIR / "mlops/evaluate/reports"
REPORTS_DIR.mkdir(exist_ok=True)

TEST_DATA_YAML = BASE_DIR / "data/Pothole.v1i.yolov8/data.yaml"


TEST_INCOMING = BASE_DIR / "models/incoming/test_model"


In [44]:

# Load YOLOv8 model directly (needed for compute_metrics)
model = YOLO(str(MODEL_PATH))


# Testing `mlops/evaluate/metrics.py`
This notebook tests the functionality of `compute_metrics()` for object detection using YOLOv8. 
We separate generic setup/imports from metrics-specific code for clarity.


In [None]:

# Compute metrics and display results
metrics = compute_metrics(model, TEST_DATA_YAML)
print("Test metrics:", metrics)


Ultralytics 8.3.207 üöÄ Python-3.9.23 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 7840MiB)
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 2499.7¬±617.9 MB/s, size: 99.9 KB)
[K[34m[1mval: [0mScanning /home/dell/Assign/MLOPs/federated-health-risk-mlops/data/Pothole.v1i.yolov8/test/labels.cache... 182 images, 54 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 182/182 306.9Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 12/12 3.1it/s 3.8s0.2s
                   all        182        429          0          0          0          0
                person          5         15          0          0          0          0
               bicycle         34         74          0          0          0          0
                   car         21         33         

# Testing `mlops/evaluate/validate_model.py`

This cell tests `validate_model()` in dry-run mode (no model movement). It will compute metrics and print them along with the saved report path.


In [None]:

# Example model for testing
TEST_MODEL_NAME = "yolov8n.pt"  

# -----------------------------
# Dry-run validation
# -----------------------------
# validate_model() should be updated to accept move_model=False
metrics = validate_model(TEST_MODEL_NAME, move_model=False)

print("\nValidation metrics (dry-run):")
print(json.dumps(metrics, indent=2))

if "_report_path" in metrics:
    print(f"\nMetrics report would be saved at: {metrics['_report_path']}")


Ultralytics 8.3.207 üöÄ Python-3.9.23 torch-2.7.0+cu128 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 7840MiB)
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 2825.9¬±497.7 MB/s, size: 95.7 KB)
[K[34m[1mval: [0mScanning /home/dell/Assign/MLOPs/federated-health-risk-mlops/data/Pothole.v1i.yolov8/valid/labels.cache... 393 images, 125 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 393/393 650.8Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 25/25 8.2it/s 3.1s<0.1s
                   all        393        904          0          0          0          0
                person          7         20          0          0          0          0
               bicycle         60         97          0          0          0          0
                   car         60        103      

# Testing `mlops/model_registry/registry_utils.py`
This cell tests `register_model()` and `list_registry()` using a dummy model file. 
It performs a dry-run registration with metrics from `validate_model()` and prints the resulting metadata and registry entries.


In [23]:

# -------------------------
# Setup test directories
# -------------------------
TEST_INCOMING = Path("models/incoming")
TEST_BLESSED = Path("models/blessed")
TEST_REJECTED = Path("models/rejected")
TEST_ARCHIVE = Path("models/archive")
for d in [TEST_INCOMING, TEST_BLESSED, TEST_REJECTED, TEST_ARCHIVE]:
    d.mkdir(parents=True, exist_ok=True)

# -------------------------
# Create dummy model file
# -------------------------
model_file = TEST_INCOMING / "weights.pt"
with open(model_file, "wb") as f:
    f.write(b"FAKE_MODEL_CONTENT")
print(f"Test model created at {model_file}")



Test model created at models/incoming/weights.pt


In [24]:

# -------------------------
# Monkey-patch compute_metrics in mlops.evaluate.validate_model
# -------------------------

def fake_compute_metrics(model_or_path, data_yaml, save_report=True, output_dir=None, report_name=None):
    return {"mAP50": 0.7, "mAP50_95": 0.5}

with patch.object(validate_module, "compute_metrics", side_effect=fake_compute_metrics):
    # Step 1: Validate model (dry-run, no moving)
    metrics_from_validation = validate_model(model_file.name, move_model=False)
    print("\nDry-run validation metrics:")
    print(json.dumps(metrics_from_validation, indent=4))

    # Step 2: Register model with metrics
    metadata = register_model(model_file, metrics=metrics_from_validation)
    print("\nDry-run registration metadata:")
    print(json.dumps(metadata, indent=4))

    # Step 3: List registered models
    registered_models = list_registry()
    print("\nCurrently registered models:")
    for m in registered_models:
        print(f"- {m['version']} : {m['path']}")

# -------------------------
# Clean-up
# -------------------------
# for d in [TEST_INCOMING, TEST_BLESSED, TEST_REJECTED, TEST_ARCHIVE]:
#     if d.exists():
#         shutil.rmtree(d)

print("\n‚úÖ Test environment cleaned up. Notebook test completed successfully.")



Dry-run validation metrics:
{
    "mAP50": 0.7,
    "mAP50_95": 0.5,
    "_passed": true,
    "_report_path": "mlops/evaluate/reports/weights.json"
}

Dry-run registration metadata:
{
    "version": "weights_v20251123_040722",
    "file_name": "weights_v20251123_040722.pt",
    "path": "models/archive/weights_v20251123_040722.pt",
    "registered_at": "20251123_040722",
    "metrics": {
        "mAP50": 0.7,
        "mAP50_95": 0.5,
        "_passed": true,
        "_report_path": "mlops/evaluate/reports/weights.json"
    },
    "hash_md5": "654fad199ca13466999aade12126e13c",
    "status": "archived"
}

Currently registered models:
- weights_v20251123_035729 : models/archive/weights_v20251123_035729.pt
- weights_v20251123_040104 : models/archive/weights_v20251123_040104.pt
- weights_v20251123_040722 : models/archive/weights_v20251123_040722.pt

‚úÖ Test environment cleaned up. Notebook test completed successfully.
