API Workflow
===

* Ultima modificación: Mayo 14, 2022

Diretorio para almacenar el proyecto
---

In [1]:
#
# Borra el directorio de trabajo
#
!rm -rf mlruns
!rm mlruns.db

Codebase
---

In [2]:
def load_data():

    import pandas as pd

    url = "https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/concrete.csv"
    df = pd.read_csv(url)
    df = df.astype({'age': 'float'})
    
    y = df["strength"]
    x = df.copy()
    x.pop("strength")

    return x, y


def make_train_test_split(x, y):

    from sklearn.model_selection import train_test_split

    (x_train, x_test, y_train, y_test) = train_test_split(
        x,
        y,
        test_size=0.25,
        random_state=123456,
    )
    return x_train, x_test, y_train, y_test


def eval_metrics(y_true, y_pred):

    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    return mse, mae, r2


def report(estimator, mse, mae, r2):

    print(estimator, ":", sep="")
    print(f"  MSE: {mse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")


# def log_metrics(mse, mae, r2):
# 
#     import mlflow
# 
#     mlflow.log_metric("mse", mse)
#     mlflow.log_metric("mae", mae)
#     mlflow.log_metric("r2", r2)


def make_pipeline(estimator):

    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import MinMaxScaler

    pipeline = Pipeline(
        steps=[
            ("minMaxScaler", MinMaxScaler()),
            ("estimator", estimator),
        ],
    )

    return pipeline


def set_tracking_uri():
    
    import mlflow
    
    mlflow.set_tracking_uri('sqlite:///mlruns.db')
    

def display_config():
    
    import mlflow
    
    print("Current model registry uri: {}".format(mlflow.get_registry_uri()))
    print("      Current tracking uri: {}".format(mlflow.get_tracking_uri()))
    

Adición del modelo con mlflow.\<model_flavor\>.log_model()
---

In [3]:
def make_k_neighbors_regressor(n_neighbors):

    from sklearn.neighbors import KNeighborsRegressor

    import mlflow

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    estimator = make_pipeline(
        estimator=KNeighborsRegressor(n_neighbors=n_neighbors),
    )
    
    set_tracking_uri()

    with mlflow.start_run(run_name="DEMO_RUN") as run:

        print(f"MLflow run ID: {run.info.run_id}")
        
        estimator.fit(x_train, y_train)

        mse, mae, r2 = eval_metrics(
            y_true=y_test,
            y_pred=estimator.predict(x_test),
        )
        
        report(estimator, mse, mae, r2)
        
        #
        # Parametros y metricas
        #
        mlflow.log_param('n_neighbors', n_neighbors)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)
        
        #
        # Registro del modelo como version 1
        #
        mlflow.sklearn.log_model(
            sk_model=estimator,
            artifact_path="model",
            registered_model_name=f"sklearn-{n_neighbors}-neighbors-regressor-model"
        )

        
make_k_neighbors_regressor(n_neighbors=5)

2022/06/03 22:49:22 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2022/06/03 22:49:22 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

MLflow run ID: 1e8edaa87d7b495f92bd5ad32045a7ea
Pipeline(steps=[('minMaxScaler', MinMaxScaler()),
                ('estimator', KNeighborsRegressor())]):
  MSE: 85.34441046511628
  MAE: 7.084488372093023
  R2: 0.7094208188067321


Successfully registered model 'sklearn-5-neighbors-regressor-model'.
2022/06/03 22:49:25 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: sklearn-5-neighbors-regressor-model, version 1
Created version '1' of model 'sklearn-5-neighbors-regressor-model'.


Adición del modelo con mlflow.register.model()
---

In [4]:
def make_k_neighbors_regressor(n_neighbors):

    from sklearn.neighbors import KNeighborsRegressor

    import mlflow

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    estimator = make_pipeline(
        estimator=KNeighborsRegressor(n_neighbors=n_neighbors),
    )
    
    set_tracking_uri()

    with mlflow.start_run(run_name="DEMO_RUN") as run:

        print(f"MLflow run ID: {run.info.run_id}")
        
        estimator.fit(x_train, y_train)

        mse, mae, r2 = eval_metrics(
            y_true=y_test,
            y_pred=estimator.predict(x_test),
        )
        
        report(estimator, mse, mae, r2)
        
        #
        # Parametros y metricas
        #
        mlflow.log_param('n_neighbors', n_neighbors)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)
        
        #
        # log del modelo
        #
        mlflow.sklearn.log_model(estimator, "model")
              
        #
        # Despues de realizar muchas corridas, se registra el medelo usando el 
        # run_id
        #
        mlflow.register_model(
            f"runs:/{run.info.run_id}",
            f"sklearn-{n_neighbors}-neighbors-regressor-model"
        )
        

        
make_k_neighbors_regressor(n_neighbors=5)

MLflow run ID: afd912e3c5cb46c093e05bfd3d79173c
Pipeline(steps=[('minMaxScaler', MinMaxScaler()),
                ('estimator', KNeighborsRegressor())]):
  MSE: 85.34441046511628
  MAE: 7.084488372093023
  R2: 0.7094208188067321


Registered model 'sklearn-5-neighbors-regressor-model' already exists. Creating a new version of this model...
2022/06/03 22:49:27 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: sklearn-5-neighbors-regressor-model, version 2
Created version '2' of model 'sklearn-5-neighbors-regressor-model'.


Adición del modelo con create_registered_model()
---

In [5]:
def make_k_neighbors_regressor(n_neighbors):

    from sklearn.neighbors import KNeighborsRegressor

    import mlflow
    from mlflow.tracking import MlflowClient

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    estimator = make_pipeline(
        estimator=KNeighborsRegressor(n_neighbors=n_neighbors),
    )

    set_tracking_uri()

    with mlflow.start_run(run_name="DEMO_RUN") as run:

        print(f"MLflow run ID: {run.info.run_id}")

        estimator.fit(x_train, y_train)

        mse, mae, r2 = eval_metrics(
            y_true=y_test,
            y_pred=estimator.predict(x_test),
        )

        report(estimator, mse, mae, r2)

        #
        # Parametros y metricas
        #
        mlflow.log_param("n_neighbors", n_neighbors)
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        #
        # log del modelo
        #
        mlflow.sklearn.log_model(estimator, "model")

        #
        # Despues de realizar muchas corridas, se registra el medelo usando el
        # run_id
        #
        client = MlflowClient()
        client.create_model_version(
            name=f"sklearn-{n_neighbors}-neighbors-regressor-model",
            source=f"mlruns/0/{run.info.run_id}/artifacts/model",
            run_id=run.info.run_id,
        )


make_k_neighbors_regressor(n_neighbors=5)

MLflow run ID: 169b0fe18316405992abf4e5b08db6ef
Pipeline(steps=[('minMaxScaler', MinMaxScaler()),
                ('estimator', KNeighborsRegressor())]):
  MSE: 85.34441046511628
  MAE: 7.084488372093023
  R2: 0.7094208188067321


2022/06/03 22:49:29 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: sklearn-5-neighbors-regressor-model, version 3


Carga de una versión específica del modelo
---

In [6]:
#
# Carga de una versión específica del modelo
#
def predict():
    
    import mlflow
    
    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)
    
    model_name = "sklearn-5-neighbors-regressor-model"
    model_version = 1

    model = mlflow.pyfunc.load_model(
        model_uri=f"models:/{model_name}/{model_version}"
    )

    return model.predict(x_test[0:10])
    
    
predict()

array([38.838, 15.852, 11.67 , 26.634, 32.11 , 27.81 , 24.602, 18.056,
       18.93 , 42.354])

Cambio manual del estado de un modelo
---

* None (cuando se crea)

* Staging

* Production

* Archived

Se entra a mlflow ui y se cambia el estado del modelo.

Para visualizar la interfase use:

```bash
mlflow ui --backend-store-uri sqlite:///mlruns.db
```

**Nota:** En docker usar:

```bash
mlflow ui --host 0.0.0.0 --backend-store-uri sqlite:///mlruns.db
``` 

con: 

http://127.0.0.1:5001


**Paso 1**

![assets/mlflow-registry-api-workflow-part-0.png](assets/mlflow-registry-api-workflow-part-0.png)

**Paso 2**

![assets/mlflow-registry-api-workflow-part-0.png](assets/mlflow-registry-api-workflow-part-1.png)

**Paso 3**

![assets/mlflow-registry-api-workflow-part-0.png](assets/mlflow-registry-api-workflow-part-2.png)

**Paso 4**

![assets/mlflow-registry-api-workflow-part-0.png](assets/mlflow-registry-api-workflow-part-3.png)

**Paso 5**

![assets/mlflow-registry-api-workflow-part-0.png](assets/mlflow-registry-api-workflow-part-4.png)

Carga de un modelo por estado
---

In [7]:
def predict():
    
    import mlflow
    
    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)
    
    model_name = "sklearn-5-neighbors-regressor-model"
    stage = 'Staging'

    model = mlflow.pyfunc.load_model(
        model_uri=f"models:/{model_name}/{stage}"
    )

    return model.predict(x_test[0:10])
    
    
predict()

MlflowException: No versions of model with name 'sklearn-5-neighbors-regressor-model' and stage 'Staging' found

Puesta en productivo como un API
---

```bash
#!/usr/bin/env sh

export MLFLOW_TRACKING_URI=http://localhost:5000

mlflow models serve -m "models:/sklearn-5-neighbors-regressor-model/Production"

```

Cambio del estado de un modelo por código
---

In [None]:
def change_name():

    import mlflow

    client = mlflow.tracking.MlflowClient()

    client.transition_model_version_stage(
        name="sklearn-5-neighbors-regressor-model",
        version=1,
        #  Staging|Archived|Production|None
        stage="Production",
    )


change_name()

Listado y búsqueda de modelos
--

In [None]:
def list_models():

    from pprint import pprint

    import mlflow

    client = mlflow.tracking.MlflowClient()

    for rm in client.list_registered_models():
        pprint(dict(rm), indent=4)
        
        
list_models()

In [None]:
def search_models():
    
    from pprint import pprint

    import mlflow

    client = mlflow.tracking.MlflowClient()
    
    for mv in client.search_model_versions("name='sklearn-5-neighbors-regressor-model'"):
        pprint(dict(mv), indent=4)
        print()
        
search_models()