# MLflow Experiment Best Run Model Registration

This `domino_model_registry_example_workbook_2.ipynb` Jupyter notebook demonstrates how to find the best model run within an experiment, based on a specified performance metric, and register that model using MLflow.

In this tutorial, you will:
* Explore all runs within a specific MLflow experiment.
* Find the best-performing model based on a user-defined metric (default: accuracy).
* Register the best model in the MLflow Model Registry for later use and tracking.

## Steps Covered:
1. Set up and configure MLflow to retrieve experiment data.
2. Use the `MlflowClient` to search for runs based on a specific performance metric.
3. Automatically register the model from the best run in the MLflow Model Registry.

## Requirements:
* A pre-existing MLflow experiment with multiple runs.
* Logged metrics and models from these runs (e.g., `overall_accuracy`, `f1-score`, etc.).
* Access to the MLflow Tracking server and Model Registry.



In [None]:
import os
import time
import mlflow
from mlflow.tracking import MlflowClient
from mlflow.exceptions import MlflowException

def find_best_run_and_register_model(experiment_name: str, model_name: str, sorting_metric: str = 'overall_accuracy', sort_order: str = 'DESC', model_description: str = None, version_tags: dict = None, model_tags: dict = None):
    """
    Finds the best run based on the specified sorting metric from an experiment
    (using the experiment name) and registers the model from that run, adding a 
    new version if the model already exists, and adding tags and description even 
    if the model has already been registered.
    
    Adds both model-wide tags and version-specific tags.
    
    :param str experiment_name: The name of the experiment to search for runs.
    :param str model_name: The name under which to register the model.
    :param str sorting_metric: The metric to sort the runs by. Defaults to 'overall_accuracy'.
    :param str sort_order: Sort order for the sorting metric. Either 'DESC' for highest or 'ASC' for lowest value. Defaults to 'DESC'.
    :param str model_description: Optional description for the registered model.
    :param dict version_tags: Optional dictionary of tags to associate with the registered model version.
    :param dict model_tags: Optional dictionary of model-wide tags to associate with the registered model.
    """

    # Initialize MLflow client
    client = MlflowClient()

    # Get the experiment by name
    experiment = client.get_experiment_by_name(experiment_name)

    if experiment is None:
        print(f"Experiment with name '{experiment_name}' not found.")
        return
    
    experiment_id = experiment.experiment_id
    print(f"Experiment ID: {experiment_id} for experiment name: {experiment_name}")

    # Determine the sorting order (ascending or descending)
    sort_order = sort_order.upper()  # Ensure sort order is uppercase
    if sort_order not in ['ASC', 'DESC']:
        print(f"Invalid sort_order '{sort_order}'. Defaulting to 'DESC'.")
        sort_order = 'DESC'

    # Fetch all runs from the experiment sorted by the specified metric in the chosen order
    runs = client.search_runs(experiment_id, order_by=[f"metrics.{sorting_metric} {sort_order}"])

    if not runs:
        print(f"No runs found for experiment '{experiment_name}'.")
        return

    # Get the best run (first run based on the sort order)
    best_run = runs[0]
    best_run_id = best_run.info.run_id
    best_metric_value = best_run.data.metrics.get(sorting_metric)

    print(f"Best run ID: {best_run_id} with {sorting_metric}: {best_metric_value}")

    # Register the model from the best run
    model_uri = f"runs:/{best_run_id}/model"
    version_registered = None

    try:
        # Check if the model is already registered
        try:
            client.get_registered_model(model_name)
            print(f"Model '{model_name}' found in the registry.")
        except MlflowException as e:
            if "RESOURCE_DOES_NOT_EXIST" in str(e):
                # If the model does not exist, create it
                print(f"Model '{model_name}' not found. Creating a new registered model.")
                client.create_registered_model(model_name)

        # Check if the model from this run has already been registered
        model_versions = client.get_latest_versions(model_name, stages=["None"])
        for version in model_versions:
            if version.run_id == best_run_id:
                print(f"Model from run '{best_run_id}' has already been registered as version {version.version}.")
                version_registered = version.version
                break

        # If the model has not been registered for this run, register it now
        if version_registered is None:
            registered_model = mlflow.register_model(model_uri, model_name)
            version_registered = registered_model.version
            print(f"Model registered under name: {model_name}, version: {version_registered}")

        # Set or update the description for the registered model version
        if model_description:
            print(f"Setting description for model version {version_registered}: {model_description}")
            client.update_model_version(
                name=model_name,
                version=version_registered,
                description=model_description
            )
        
        # Set or update version-specific tags for the registered model version
        if version_tags:
            print(f"Adding tags for model version {version_registered}")
            for key, value in version_tags.items():
                print(f"Setting version tag {key}: {value}")
                client.set_model_version_tag(
                    name=model_name,
                    version=version_registered,
                    key=key,
                    value=value
                )
        
        # Set or update model-wide tags for the registered model
        if model_tags:
            print(f"Adding model-wide tags for model '{model_name}'")
            for key, value in model_tags.items():
                print(f"Setting model tag {key}: {value}")
                client.set_registered_model_tag(
                    name=model_name,
                    key=key,
                    value=value
                )

        print(f"Model version {version_registered} registered with description, version-specific tags, and model-wide tags (if provided).")
    
    except MlflowException as e:
        print(f"Error registering the model: {str(e)}")




In [None]:
# Values for registering model
username = os.environ['DOMINO_STARTING_USERNAME']
project_name = os.environ['DOMINO_PROJECT_NAME']

# model_name = f"your_model_name_here-{username}-{project_name}" # Advanced naming 
model_name = 'Reg_model_cm'  # Replace with the name you'd like for the registered model
experiment_name = 'random-forest-gen-chris-admin'  # Replace with your actual experiment name
sorting_metric = 'training_mean_squared_error'  # Change to the metric you'd like to sort by (default: 'overall_accuracy')
sort_order = 'ASC'  # Set 'ASC' for lowest or 'DESC' for highest (default is 'DESC')
model_description = 'This model was trained using the best run from experiment based on training_mean_squared_error.'  # Add your model description here

# Model-wide tags (apply to the entire model across all versions)
model_tags = {}
model_tags = {
    'team': 'Domino',
    'project': project_name,
    'business_unit': 'Enablement'
}

# Version-specific tags (apply only to this version of the model)
version_tags = {}
version_tags = {
    'model_type': 'SVM',
    'dataset': 'Diabetes test',
    'author': username
}

# Call the function, version_tags and model_tags arguments are optional
find_best_run_and_register_model(experiment_name, model_name, sorting_metric, sort_order, model_description, version_tags, model_tags)
