Proyectos con varios modelos
===

* Ultima modificación: Mayo 14, 2022

Diretorio para almacenar el proyecto
---

In [1]:
#
# Crea una carpeta para el proyecto. El proyecto también puede estar alojado en
# un repositorio de GitHub.
#
!rm -rf mlruns
!rm -rf /tmp/example
!mkdir /tmp/example

Código en Python
---

In [2]:
%%writefile /tmp/example/common.py

# ---------------------------------------------------------------------------------------
#
# Codigo comun a todos los modelos
#
# ---------------------------------------------------------------------------------------


def load_data():

    import pandas as pd

    url = "https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/concrete.csv"
    df = pd.read_csv(url)

    y = df["strength"]
    x = df.copy()
    x.pop("strength")

    return x, y


def make_train_test_split(x, y):

    from sklearn.model_selection import train_test_split

    (x_train, x_test, y_train, y_test) = train_test_split(
        x,
        y,
        test_size=0.25,
        random_state=123456,
    )
    return x_train, x_test, y_train, y_test


def eval_metrics(y_true, y_pred):

    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    return mse, mae, r2


def report(estimator, mse, mae, r2):

    print(estimator, ":", sep="")
    print(f"  MSE: {mse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")


def log_metrics(mse, mae, r2):

    import mlflow

    mlflow.log_metric("mse", mse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)


def make_pipeline(estimator):

    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import MinMaxScaler

    pipeline = Pipeline(
        steps=[
            ("minMaxScaler", MinMaxScaler()),
            ("estimator", estimator),
        ],
    )

    return pipeline

Writing /tmp/example/common.py


Modelo de regresión lineal
---

In [3]:
%%writefile /tmp/example/run_linear_regression.py


def run():

    import sys

    import common
    from sklearn.linear_model import LinearRegression

    import mlflow

    x, y = common.load_data()
    x_train, x_test, y_train, y_test = common.make_train_test_split(x, y)

    mlflow.sklearn.autolog()

    estimator = common.make_pipeline(
        estimator=LinearRegression(),
    )

    with mlflow.start_run():
        estimator.fit(x_train, y_train)

    mse, mae, r2 = common.eval_metrics(
        y_true=y_test,
        y_pred=estimator.predict(x_test),
    )
    common.log_metrics(mse, mae, r2)
    common.report(estimator, mse, mae, r2)


if __name__ == "__main__":
    run()

Writing /tmp/example/run_linear_regression.py


Modelo de regresión con vecinos más cercanos
---

In [4]:
%%writefile /tmp/example/run_k_neighbors_regressor.py


def run():

    import sys

    import common
    from sklearn.neighbors import KNeighborsRegressor

    import mlflow

    x, y = common.load_data()
    x_train, x_test, y_train, y_test = common.make_train_test_split(x, y)

    n_neighbors = int(sys.argv[1])

    mlflow.sklearn.autolog()

    estimator = common.make_pipeline(
        estimator=KNeighborsRegressor(n_neighbors=n_neighbors),
    )

    with mlflow.start_run():
        estimator.fit(x_train, y_train)

    mse, mae, r2 = common.eval_metrics(
        y_true=y_test,
        y_pred=estimator.predict(x_test),
    )
    common.log_metrics(mse, mae, r2)
    common.report(estimator, mse, mae, r2)


if __name__ == "__main__":
    run()

Writing /tmp/example/run_k_neighbors_regressor.py


MLproject
---

In [5]:
%%writefile /tmp/example/MLproject
name: proyecto de demostracion

entry_points:
    linear_regression:
        command: 'python3 run_linear_regression.py'
        
        
    k_neighbors_regressor:
        parameters:
            n_neighbors: {type: integer, default: 5}
        command: 'python3 run_k_neighbors_regressor.py {n_neighbors}'
        

Writing /tmp/example/MLproject


Creación de los experimentos
---

In [6]:
!mlflow experiments create -n linear_runs

Created experiment 'linear_runs' with id 1


In [7]:
!mlflow experiments create -n knn_runs

Created experiment 'knn_runs' with id 2


Ejecución de los experimentos
---

In [8]:
#
# Ejecución del modelo de regresión lineal
#
!mlflow run --env-manager=local -e linear_regression --experiment-id 1 /tmp/example

2022/06/04 03:26:25 INFO mlflow.projects.utils: === Created directory /tmp/tmph275gi_y for downloading remote URIs passed to arguments of type 'path' ===
2022/06/04 03:26:25 INFO mlflow.projects.backend.local: === Running command 'python3 run_linear_regression.py' in run with ID 'c5fe221f5585414e9653e2f1b31ea5b3' === 
Pipeline(steps=[('minMaxScaler', MinMaxScaler()),
                ('estimator', LinearRegression())]):
  MSE: 117.25636031414096
  MAE: 8.526872668000976
  R2: 0.6007675607096427
2022/06/04 03:26:29 INFO mlflow.projects: === Run (ID 'c5fe221f5585414e9653e2f1b31ea5b3') succeeded ===


In [9]:
#
# Ejecución del modelo de vecinos más cercanos
#
for n in range(1, 5):
    print()
    print(f"----------------------------- neighbors = {n} -----------------------------")
    !mlflow run --env-manager=local -e k_neighbors_regressor --experiment-id 2  -P n_neighbors={n}  /tmp/example
    print()
    


----------------------------- neighbors = 1 -----------------------------
2022/06/04 03:26:30 INFO mlflow.projects.utils: === Created directory /tmp/tmpt4sh7i1i for downloading remote URIs passed to arguments of type 'path' ===
2022/06/04 03:26:30 INFO mlflow.projects.backend.local: === Running command 'python3 run_k_neighbors_regressor.py 1' in run with ID '0fdb31798713414bbbee9b90fbb08c04' === 
Pipeline(steps=[('minMaxScaler', MinMaxScaler()),
                ('estimator', KNeighborsRegressor(n_neighbors=1))]):
  MSE: 85.1262027131783
  MAE: 6.625852713178294
  R2: 0.7101637688082918
2022/06/04 03:26:34 INFO mlflow.projects: === Run (ID '0fdb31798713414bbbee9b90fbb08c04') succeeded ===


----------------------------- neighbors = 2 -----------------------------
2022/06/04 03:26:36 INFO mlflow.projects.utils: === Created directory /tmp/tmpmc9cmwbl for downloading remote URIs passed to arguments of type 'path' ===
2022/06/04 03:26:36 INFO mlflow.projects.backend.local: === Running comm

MLflow ui
---

Para visualizar la interfase use:

```bash
mlflow ui
```

**Nota:** En docker usar:

```bash
mlflow ui --host 0.0.0.0 
``` 

con: 

http://127.0.0.1:5001


![assets/mlflow-project-1-local-part-0](assets/mlflow-project-4-experiments-part-0.png)

**Detalles de la corrida**

![assets/mlflow-project-1-local-part-1](assets/mlflow-project-4-experiments-part-1.png)
![assets/mlflow-project-1-local-part-2](assets/mlflow-project-4-experiments-part-2.png)