MLFLOW Model Registry

In [None]:
from mlflow.tracking import MlflowClient
MLFLOW_TRACKING_URI = "sqlite:///C:/Users/LENOVO/Documents/mlops-zoomcamp/mlflow.db"

Interacting with the MLflow tracking server


The MlflowClient object allows us to interact with...

an MLflow Tracking Server that creates and manages experiments and runs.

an MLflow Registry Server that creates and manages registered models and model versions.


In [None]:
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

# List all experiments
client.search_experiments()


In [None]:
#create an experiment
client.create_experiment(name="experiment-from-notebook")

In [None]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    # filter_string="metrics.rmse < 7",
    filter_string="",
    run_view_type=ViewType.ACTIVE_ONLY,
    # max_results=5,
    order_by=["metrics.rmse ASC"]
)


In [None]:
for run in runs:
    # print(f"run id: {run.info.run_id}, rmse metric: {run.data.metrics['rmse']:.4f}")
    print(f"run id: {run.info.run_id}")

# Interacting with the Model Registry


In this section We will use the MlflowClient instance to:

1-Register a new version for the experiment nyc-taxi-regressor

2-Retrieve the latests versions of the model nyc-taxi-regressor and check that a new version 4 was created.

3-Transition the version 4 to "Staging" and adding annotations to it.


In [None]:
import mlflow
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)


In [None]:
#register model
run_id = "bf12dedc26054c8b83ff2e3159264b30"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-regressor")


In [None]:
#deprecated 
model_name = "nyc-taxi-regressor"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")


In [None]:
model_name = "nyc-taxi-regressor"
# Retrieve all versions of the model
all_versions = client.search_model_versions(f"name='{model_name}'")

# Print out the version and stage
for version in all_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")


In [None]:
model_version = 3
new_stage = "Staging"
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)


In [None]:
from datetime import datetime

date = datetime.today().date()
client.update_model_version(
    name=model_name,
    version=model_version,
    description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
)


In [None]:
model_version = 2
new_stage = "Production"
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)


# Get the Model and make predictions

In [None]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.tpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.tpep_pickup_datetime)

    df['duration'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    # category = ['PULocationID', 'DOLocationID']
    # new_df[category] = new_df[category].astype(str)

    # numerical = ['trip_distance']
    train_dicts = df[categorical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    logged_model='runs:/bf12dedc26054c8b83ff2e3159264b30/models_mlflow'
    run_id = 'bf12dedc26054c8b83ff2e3159264b30'
    # model = mlflow.pyfunc.load_model(logged_model)
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    # model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
    #y_pred = model.predict(pd.DataFrame(X_test))
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}


In [None]:
# model_name = "nyc-taxi-regressor"
model_name

In [None]:
filename = r"C:\Users\LENOVO\Documents\mlops-zoomcamp\mlops-zoomcamp\01-intro\data\yellow_tripdata_2023-02.parquet"
df = read_dataframe(filename)
# df = pd.read_parquet(filename)
# df.columns



In [None]:
client.download_artifacts(run_id=run_id, path='preprocessor', dst_path='.')


In [None]:
import pickle

with open("preprocessor/preprocessor.b", "rb") as f_in:
    dv = pickle.load(f_in)


In [None]:
X_test = preprocess(df, dv)
X_test.shape

In [None]:
target = "duration"
y_test = df[target].values


In [None]:
%time test_model(name=model_name, stage="Production", X_test=X_test, y_test=y_test)


In [None]:
logged_model='runs:/bf12dedc26054c8b83ff2e3159264b30/models_mlflow'
model = mlflow.pyfunc.load_model(logged_model)


In [None]:
model.predict(X_test)

In [None]:
model_name