In [13]:
#Carga de datos

In [14]:
def load_data():

    import pandas as pd

    url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    df = pd.read_csv(url, sep=";")

    y = df["quality"]
    x = df.copy()
    x.pop("quality")

    return x, y

In [15]:
#Particionamiento de los datos

In [16]:
def make_train_test_split(x, y):

    from sklearn.model_selection import train_test_split

    (x_train, x_test, y_train, y_test) = train_test_split(
        x,
        y,
        test_size=0.25,
        random_state=123456,
    )
    return x_train, x_test, y_train, y_test

In [17]:
#Cálculo de métricas de evaluación

In [18]:
def eval_metrics(y_true, y_pred):

    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    return mse, mae, r2

In [19]:
#Reporte de métricas de evaluación

In [20]:
def report(estimator, mse, mae, r2):

    print(estimator, ":", sep="")
    print(f"  MSE: {mse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")

In [21]:
#Entrenamiento

In [22]:
def train_estimator(alpha=0.5, l1_ratio=0.5, verbose=1):

    import mlflow.sklearn
    from sklearn.linear_model import ElasticNet

    import mlflow

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    print('Tracking directory:', mlflow.get_tracking_uri())

    with mlflow.start_run():

        estimator = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=12345)
        estimator.fit(x_train, y_train)
        mse, mae, r2 = eval_metrics(y_test, y_pred=estimator.predict(x_test))
        if verbose > 0:
            report(estimator, mse, mae, r2)


        #
        # Tracking de parámetros
        #
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)

        #
        # Tracking de metricas
        #
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        #
        # Tracking del modelo
        #
        mlflow.sklearn.log_model(estimator, "model")

    # -------------------------------------------------------------------------
    # Ya no se requiere con MLflow
    # -------------------------------------------------------------------------
    #
    # best_estimator = load_best_estimator()
    # if best_estimator is None or estimator.score(x_test, y_test) > best_estimator.score(
    #     x_test, y_test
    # ):
    #     best_estimator = estimator
    #
    # save_best_estimator(best_estimator)

In [23]:
#Búsqueda manual de los mejores hiperparámetros

In [24]:
train_estimator(0.5, 0.5)

Tracking directory: file:///home/elicoubuntu/Producto_de_Datos/Demos/Mlflow-demo-3/mlruns
ElasticNet(alpha=0.5, random_state=12345):
  MSE: 0.6349429447805036
  MAE: 0.6453803508338732
  R2: 0.0890018368226928


MlflowException: The configured tracking uri scheme: 'file' is invalid for use with the proxy mlflow-artifact scheme. The allowed tracking schemes are: {'http', 'https'}

In [25]:
train_estimator(0.2, 0.2)

Tracking directory: file:///home/elicoubuntu/Producto_de_Datos/Demos/Mlflow-demo-3/mlruns
ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=12345):
  MSE: 0.5170837474931838
  MAE: 0.5701436798648394
  R2: 0.2581028767270219


MlflowException: The configured tracking uri scheme: 'file' is invalid for use with the proxy mlflow-artifact scheme. The allowed tracking schemes are: {'http', 'https'}