# Model Registry Sampler

### Setup

In [1]:
import mlflow
import time
print("MLflow Version:",mlflow.__version__)
mlflow.tracking.get_tracking_uri()

MLflow Version: 2.7.1



* 'schema_extra' has been renamed to 'json_schema_extra'


'http://localhost:5000'

In [2]:
data_path = "../data/train/wine-quality-white.csv"
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
naptime = 0
model_name = "sklearn_registry_sampler"
experiment_name = "sklearn_registry_sampler"
mlflow.set_experiment(experiment_name)
experiment_id = client.get_experiment_by_name(experiment_name).experiment_id
experiment_id, experiment_name, naptime

('870166387730181755', 'sklearn_registry_sampler', 0)

### Define training

In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
import mlflow
import mlflow.sklearn

In [4]:
data = pd.read_csv(data_path)
train, test = train_test_split(data)
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]

In [5]:
def train(max_depth):
    with mlflow.start_run(run_name="reg_test") as run:
        run_id = run.info.run_uuid
        dt = DecisionTreeRegressor(max_depth=max_depth)
        dt.fit(train_x, train_y)
        predictions = dt.predict(test_x)
        mlflow.log_param("max_depth", max_depth)
        rmse = np.sqrt(mean_squared_error(test_y, predictions))
        mlflow.log_metric("rmse", rmse)
        print(f"{experiment_id} {run_id} {round(rmse,3)}")
        mlflow.sklearn.log_model(dt, "sklearn-model")

### Create runs

In [6]:
max_depths = [1,2,4,5,16]
for x in max_depths:
    train(x)

870166387730181755 a8bbd45b6295461889c5d4b62f77069d 0.819




870166387730181755 28e4131f09ae4ef1aa8c3461e112ded1 0.786
870166387730181755 ec042fce8a0b403e938437da22b90b14 0.745
870166387730181755 f0855b30fbf84464b302d693e1f96191 0.755
870166387730181755 ec22d452079c4237834da694f8cb6a1c 0.874


### Create model versions

In [7]:
runs = client.search_runs(experiment_id,"", order_by=["metrics.rmse asc"])
for run in runs:
    print(f"{round(run.data.metrics['rmse'],3)} {run.data.params}")

0.745 {'max_depth': '4'}
0.755 {'max_depth': '5'}
0.786 {'max_depth': '2'}
0.819 {'max_depth': '1'}
0.874 {'max_depth': '16'}


In [8]:
prod_run = runs[:1][0]
round(prod_run.data.metrics['rmse'],3)

0.745

In [9]:
staging_runs = runs[1:4]
none_runs = runs[4:]
len(staging_runs),len(none_runs)

(3, 1)

In [10]:
for run in staging_runs:
    print(f"{round(run.data.metrics['rmse'],3)} {run.data.params}")

0.755 {'max_depth': '5'}
0.786 {'max_depth': '2'}
0.819 {'max_depth': '1'}


In [11]:
for run in none_runs:
    print(f"{round(run.data.metrics['rmse'],3)} {run.data.params}")

0.874 {'max_depth': '16'}


## Registry

In [12]:
try:
    client.delete_registered_model(model_name)
    print("Deleting model")
except Exception as e:
    print(e)

RESOURCE_DOES_NOT_EXIST: Registered Model with name=sklearn_registry_sampler not found


In [13]:
from mlflow.exceptions import MlflowException, RestException
try:
    registered_model = client.get_registered_model(model_name)
    print("Found existing model")
except RestException as e:
    print("Creating new model")
    client.create_registered_model(model_name)
    registered_model = client.get_registered_model(model_name)
type(registered_model), registered_model.__dict__

Creating new model


(mlflow.entities.model_registry.registered_model.RegisteredModel,
 {'_name': 'sklearn_registry_sampler',
  '_creation_time': 1697790565482,
  '_last_updated_timestamp': 1697790565482,
  '_description': '',
  '_latest_version': [],
  '_tags': {},
  '_aliases': {}})

### Production model

In [14]:
prod_run.info.artifact_uri

'mlflow-artifacts:/870166387730181755/ec042fce8a0b403e938437da22b90b14/artifacts'

In [15]:
source = f"{prod_run.info.artifact_uri}/sklearn-model"
client.create_model_version(model_name, source, prod_run.info.run_id)
time.sleep(naptime)

2023/10/20 15:29:25 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: sklearn_registry_sampler, version 1


In [16]:
versions = client.get_latest_versions(model_name)
len(versions),versions

(1,
 [<ModelVersion: aliases=[], creation_timestamp=1697790565506, current_stage='None', description='', last_updated_timestamp=1697790565506, name='sklearn_registry_sampler', run_id='ec042fce8a0b403e938437da22b90b14', run_link='', source='mlflow-artifacts:/870166387730181755/ec042fce8a0b403e938437da22b90b14/artifacts/sklearn-model', status='READY', status_message='', tags={}, user_id='', version='1'>])

In [17]:
registered_model = client.get_registered_model(model_name)
versions = registered_model.latest_versions
len(versions),versions

(1,
 [<ModelVersion: aliases=[], creation_timestamp=1697790565506, current_stage='None', description='', last_updated_timestamp=1697790565506, name='sklearn_registry_sampler', run_id='ec042fce8a0b403e938437da22b90b14', run_link='', source='mlflow-artifacts:/870166387730181755/ec042fce8a0b403e938437da22b90b14/artifacts/sklearn-model', status='READY', status_message='', tags={}, user_id='', version='1'>])

In [18]:
versionDetails = client.get_model_version(model_name,1)
versionDetails.__dict__

{'_name': 'sklearn_registry_sampler',
 '_version': '1',
 '_creation_time': 1697790565506,
 '_last_updated_timestamp': 1697790565506,
 '_description': '',
 '_user_id': '',
 '_current_stage': 'None',
 '_source': 'mlflow-artifacts:/870166387730181755/ec042fce8a0b403e938437da22b90b14/artifacts/sklearn-model',
 '_run_id': 'ec042fce8a0b403e938437da22b90b14',
 '_run_link': '',
 '_status': 'READY',
 '_status_message': '',
 '_tags': {},
 '_aliases': []}

In [19]:
#client.update_model_version(model_name, 1, stage="Production", description="My prod version") # 1.8.0
client.transition_model_version_stage (model_name, 1, "Production") # 1.9.0

<ModelVersion: aliases=[], creation_timestamp=1697790565506, current_stage='Production', description='', last_updated_timestamp=1697790565579, name='sklearn_registry_sampler', run_id='ec042fce8a0b403e938437da22b90b14', run_link='', source='mlflow-artifacts:/870166387730181755/ec042fce8a0b403e938437da22b90b14/artifacts/sklearn-model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [20]:
client.get_model_version_stages(model_name,1)

['None', 'Staging', 'Production', 'Archived']

In [21]:
client.get_latest_versions(model_name)

[<ModelVersion: aliases=[], creation_timestamp=1697790565506, current_stage='Production', description='', last_updated_timestamp=1697790565579, name='sklearn_registry_sampler', run_id='ec042fce8a0b403e938437da22b90b14', run_link='', source='mlflow-artifacts:/870166387730181755/ec042fce8a0b403e938437da22b90b14/artifacts/sklearn-model', status='READY', status_message='', tags={}, user_id='', version='1'>]

In [22]:
versionDetails = client.get_model_version(model_name,1)
versionDetails.__dict__

{'_name': 'sklearn_registry_sampler',
 '_version': '1',
 '_creation_time': 1697790565506,
 '_last_updated_timestamp': 1697790565579,
 '_description': '',
 '_user_id': '',
 '_current_stage': 'Production',
 '_source': 'mlflow-artifacts:/870166387730181755/ec042fce8a0b403e938437da22b90b14/artifacts/sklearn-model',
 '_run_id': 'ec042fce8a0b403e938437da22b90b14',
 '_run_link': '',
 '_status': 'READY',
 '_status_message': '',
 '_tags': {},
 '_aliases': []}

### Staging

In [23]:
def show_versions(versions):
    for v in versions:
        print(f"{v.run_id} {v.version} {v.current_stage} '{v.description}'")

In [24]:
 for j,run in enumerate(staging_runs):
    print(f"==== {j}")
    source = f"{run.info.artifact_uri}/sklearn-model"
    version = client.create_model_version(model_name, source, run.info.run_id)
    #print(version.__dict__)
    time.sleep(naptime)
    versionDetails = client.get_model_version(model_name,version.version)
    print(versionDetails.__dict__)
    #client.update_model_version(model_name, version.version, stage="Staging", description=f"My staging version {j}") # 1.8.0
    client.transition_model_version_stage(model_name, version.version, "Staging")

==== 0


2023/10/20 15:29:25 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: sklearn_registry_sampler, version 2
2023/10/20 15:29:25 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: sklearn_registry_sampler, version 3
2023/10/20 15:29:25 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: sklearn_registry_sampler, version 4


{'_name': 'sklearn_registry_sampler', '_version': '2', '_creation_time': 1697790565680, '_last_updated_timestamp': 1697790565680, '_description': '', '_user_id': '', '_current_stage': 'None', '_source': 'mlflow-artifacts:/870166387730181755/f0855b30fbf84464b302d693e1f96191/artifacts/sklearn-model', '_run_id': 'f0855b30fbf84464b302d693e1f96191', '_run_link': '', '_status': 'READY', '_status_message': '', '_tags': {}, '_aliases': []}
==== 1
{'_name': 'sklearn_registry_sampler', '_version': '3', '_creation_time': 1697790565687, '_last_updated_timestamp': 1697790565687, '_description': '', '_user_id': '', '_current_stage': 'None', '_source': 'mlflow-artifacts:/870166387730181755/28e4131f09ae4ef1aa8c3461e112ded1/artifacts/sklearn-model', '_run_id': '28e4131f09ae4ef1aa8c3461e112ded1', '_run_link': '', '_status': 'READY', '_status_message': '', '_tags': {}, '_aliases': []}
==== 2
{'_name': 'sklearn_registry_sampler', '_version': '4', '_creation_time': 1697790565694, '_last_updated_timestamp':

### Manipulate Versions

#### Update Version

In [25]:
show_versions(client.search_model_versions(f"name='{model_name}'"))

a8bbd45b6295461889c5d4b62f77069d 4 Staging ''
28e4131f09ae4ef1aa8c3461e112ded1 3 Staging ''
f0855b30fbf84464b302d693e1f96191 2 Staging ''
ec042fce8a0b403e938437da22b90b14 1 Production ''


In [26]:
#client.update_model_version(model_name, 3, stage='None') # 1.8.0
client.transition_model_version_stage (model_name, 3, "None") # 1.9.0

<ModelVersion: aliases=[], creation_timestamp=1697790565687, current_stage='None', description='', last_updated_timestamp=1697790565720, name='sklearn_registry_sampler', run_id='28e4131f09ae4ef1aa8c3461e112ded1', run_link='', source='mlflow-artifacts:/870166387730181755/28e4131f09ae4ef1aa8c3461e112ded1/artifacts/sklearn-model', status='READY', status_message='', tags={}, user_id='', version='3'>

In [27]:
show_versions(client.search_model_versions(f"name='{model_name}'"))

28e4131f09ae4ef1aa8c3461e112ded1 3 None ''
a8bbd45b6295461889c5d4b62f77069d 4 Staging ''
f0855b30fbf84464b302d693e1f96191 2 Staging ''
ec042fce8a0b403e938437da22b90b14 1 Production ''


#### Delete Version

In [28]:
client.delete_model_version(model_name, 3)

In [29]:
show_versions(client.search_model_versions(f"name='{model_name}'"))

a8bbd45b6295461889c5d4b62f77069d 4 Staging ''
f0855b30fbf84464b302d693e1f96191 2 Staging ''
ec042fce8a0b403e938437da22b90b14 1 Production ''


### Execute version methods

In [30]:
versions = client.search_model_versions(f"name='{model_name}'")
show_versions(versions)

a8bbd45b6295461889c5d4b62f77069d 4 Staging ''
f0855b30fbf84464b302d693e1f96191 2 Staging ''
ec042fce8a0b403e938437da22b90b14 1 Production ''


In [31]:
versions =  client.get_latest_versions(model_name)
show_versions(versions)

a8bbd45b6295461889c5d4b62f77069d 4 Staging ''
ec042fce8a0b403e938437da22b90b14 1 Production ''


In [32]:
registered_model = client.get_registered_model(model_name)
registered_model.__dict__

{'_name': 'sklearn_registry_sampler',
 '_creation_time': 1697790565482,
 '_last_updated_timestamp': 1697790565758,
 '_description': '',
 '_latest_version': [<ModelVersion: aliases=[], creation_timestamp=1697790565694, current_stage='Staging', description='', last_updated_timestamp=1697790565699, name='sklearn_registry_sampler', run_id='a8bbd45b6295461889c5d4b62f77069d', run_link='', source='mlflow-artifacts:/870166387730181755/a8bbd45b6295461889c5d4b62f77069d/artifacts/sklearn-model', status='READY', status_message='', tags={}, user_id='', version='4'>,
  <ModelVersion: aliases=[], creation_timestamp=1697790565506, current_stage='Production', description='', last_updated_timestamp=1697790565579, name='sklearn_registry_sampler', run_id='ec042fce8a0b403e938437da22b90b14', run_link='', source='mlflow-artifacts:/870166387730181755/ec042fce8a0b403e938437da22b90b14/artifacts/sklearn-model', status='READY', status_message='', tags={}, user_id='', version='1'>],
 '_tags': {},
 '_aliases': {}}

In [33]:
show_versions(registered_model.latest_versions)

a8bbd45b6295461889c5d4b62f77069d 4 Staging ''
ec042fce8a0b403e938437da22b90b14 1 Production ''


In [34]:
versions = client.get_latest_versions(model_name, stages=["Production"])
show_versions(versions)

ec042fce8a0b403e938437da22b90b14 1 Production ''


In [35]:
versions = client.get_latest_versions(model_name, stages=["Staging"])
show_versions(versions)

a8bbd45b6295461889c5d4b62f77069d 4 Staging ''


### Get Model and predict

#### Production model

In [36]:
data_predict = data.drop(['quality'], axis=1)

In [37]:
model_uri = f"models:/{model_name}/production"
model_uri

'models:/sklearn_registry_sampler/production'

In [38]:
model = mlflow.sklearn.load_model(model_uri)
model

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [39]:
predictions = model.predict(data_predict)
pd.DataFrame(predictions).head(5)

Unnamed: 0,0
0,5.434518
1,5.055118
2,5.434518
3,5.884848
4,5.884848


#### Staging model

NOTE: Nếu có lớn hơn 1 model ở trạng thái Staging thì MLflow sẽ trả về model mới nhất. Trong doc không đề cập vấn đề này.
This is not documented.

In [40]:
model = mlflow.sklearn.load_model(f"models:/{model_name}/staging")
model

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [41]:
predictions = model.predict(data_predict)
pd.DataFrame(predictions).head(5)

Unnamed: 0,0
0,5.570489
1,5.570489
2,5.570489
3,5.570489
4,5.570489
