In [None]:
from mlflow import MlflowClient

client = MlflowClient()

# Assign alias to a model version
client.set_registered_model_alias(
    name="RandomForestRegressor",
    alias="production",  # Your custom alias
    version=3  # Version number
)

In [12]:
import mlflow
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

In [13]:
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
mlflow.set_tracking_uri("sqlite:///mlflow.db")
current_uri = mlflow.get_tracking_uri()
print("Current Tracking URI:", current_uri)
client.search_experiments()

[<Experiment: artifact_location='/workspaces/LearnMLOPS/03-traning/models/mlruns/6', creation_time=1749674191155, experiment_id='6', last_update_time=1749674191155, lifecycle_stage='active', name='my-cool-experiment', tags={}>,
 <Experiment: artifact_location='/workspaces/LearnMLOPS/03-traning/models/mlruns/5', creation_time=1749666481570, experiment_id='5', last_update_time=1749666481570, lifecycle_stage='active', name='new-experiment', tags={}>,
 <Experiment: artifact_location='/workspaces/LearnMLOPS/03-traning/models/mlruns/3', creation_time=1749581168605, experiment_id='3', last_update_time=1749581168605, lifecycle_stage='active', name='my-experiment', tags={}>,
 <Experiment: artifact_location='/workspaces/LearnMLOPS/03-traning/models/mlruns/2', creation_time=1749573231525, experiment_id='2', last_update_time=1749573231525, lifecycle_stage='active', name='nyv-taxi-experiment', tags={}>,
 <Experiment: artifact_location='/workspaces/LearnMLOPS/03-traning/models/mlruns/1', creation_ti

In [3]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='5',
    filter_string="metrics.rmse < 7",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

In [4]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: 8d6a9cf2353c49b6a507c4707c678cdd, rmse: 6.4399
run id: f5450207657b49a49bbfc1e7fb0ecf4e, rmse: 6.5890
run id: 15d871eeab8c44049caffaaba807dee0, rmse: 6.9190


In [18]:
from mlflow.entities import ViewType

# Pass experiment_ids as a LIST
runs = client.search_runs(
    experiment_ids=['3'],  # Note: Now a list with '1' as element
    filter_string="metrics.rmse < 7",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

if not runs:
    print("No runs found matching criteria!")
else:
    for run in runs:
        print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: 0b5765ea007e4f5280dcc0d3c3f291b3, rmse: 6.1753
run id: eb3a07512d7e4198a6e4015d8b26d58d, rmse: 6.1821
run id: 8e1b1e600b754f36b400aac6289c5c77, rmse: 6.1885
run id: 98512e418f704e768cca972070ee70a7, rmse: 6.1921
run id: 06e24b4625c34f75b272a4abab7e57ed, rmse: 6.2046


In [17]:
# Get Registered Model
client = MlflowClient()
# Get the latest version
model_name = "nyc-taxi-regressor"
client.get_registered_model(model_name)
latest_version = client.get_latest_versions(model_name, stages=["None"])[0].version

print(f"Latest version: {latest_version}")

Latest version: 2


  latest_version = client.get_latest_versions(model_name, stages=["None"])[0].version


In [39]:
client.set_registered_model_alias(
    name=model_name,
    alias="champion",  # Like "Production" but more flexible
    version=2
)

# Get the version with the alias
champion_version = client.get_model_version_by_alias(model_name, "champion")
print(model.version) 
# 3. Extract metadata
print(f"Version: {champion_version.version}")
print(f"Run ID: {champion_version.run_id}")
print(f"Current Stage: {champion_version.current_stage}")

# 4. Download artifacts if needed
client.download_artifacts(
    run_id=champion_version.run_id,
    path='preprocessor',
    dst_path="."
)

2
Version: 2
Run ID: 25ca36fedb684e6d91c75997b9ffea24
Current Stage: None


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

'/workspaces/LearnMLOPS/03-traning/models/preprocessor'

## Comparing versions and selecting model

In [49]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


# def test_model(name, stage, X_test, y_test):
#     model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
#     y_pred = model.predict(X_test)
#     return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}
def test_model(name: str, alias: str, X_test: pd.DataFrame, y_test: pd.Series):
    """Test a model by its alias (replaces stage)"""
    model_uri = f"models:/{name}@{alias}"  # Note @ symbol for aliases
    model = mlflow.pyfunc.load_model(model_uri)
    
    preds = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, preds, squared=False)}


In [30]:
import os
os.getcwd()
root_file = "/workspaces/LearnMLOPS/03-traning/models/green_tripdata_2021-03.parquet"

Current directory: /workspaces/LearnMLOPS/03-traning/models


In [35]:
df = read_dataframe('/workspaces/LearnMLOPS/03-traning/models/green_tripdata_2021-03.parquet')

In [None]:
model_versions = client.get_latest_versions(model_name)

for version in model_versions:
    print(f"Version {version.version} - Run ID: {version.run_id}")
    run_id = version.run_id

In [40]:
# Dowload the model
import pickle

with open("preprocessor/preprocessor.b", "rb") as f_in:
    dv = pickle.load(f_in)

In [41]:
X_test = preprocess(df, dv)
target = "duration"
y_test = df[target].values

In [52]:
client.set_registered_model_alias(model_name, "challenger", version=1)

In [53]:
%time test_model(name=model_name, alias="challenger", X_test=X_test, y_test=y_test)

CPU times: user 1.9 s, sys: 49.6 ms, total: 1.95 s
Wall time: 1.28 s


{'rmse': 6.806257685294212}

In [54]:
%time test_model(name=model_name, alias="champion", X_test=X_test, y_test=y_test)


CPU times: user 48.2 ms, sys: 3.87 ms, total: 52.1 ms
Wall time: 57 ms


{'rmse': 930.2196670431273}