In [16]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from mlflow.tracking import MlflowClient
from mlflow.exceptions import MlflowException
import os
import mlflow
import mlflow.sklearn

In [23]:
# os.environ[
#     "MLFLOW_TRACKING_URI"
# ] = "postgresql+psycopg2://postgres:postgres@ml:5432/mlflow_db"
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "http://127.0.0.1:9000"
os.environ["AWS_ACCESS_KEY_ID"] = "baolong"
os.environ["AWS_SECRET_ACCESS_KEY"] = "Admin#123"

In [24]:
experiment_name = "poc_experiment"
MLFLOW_TRACKING_URI = "http://localhost:5000"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

try:
    mlflow.create_experiment(experiment_name, artifact_location="s3://mlflow")
except MlflowException as e:
    print(e)
mlflow.set_experiment(experiment_name)
print((mlflow.get_tracking_uri(), mlflow.get_artifact_uri()))

('http://localhost:5000', 's3://mlflow/ec1757a7524544ee8d3a51594df91da7/artifacts')


In [28]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


def train(in_alpha, in_l1_ratio):
    np.random.seed(40)

    # Read the wine-quality csv file from the URL
    csv_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    data = pd.read_csv(csv_url, sep=";")

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    # Set default values if no alpha is provided
    if float(in_alpha) is None:
        alpha = 0.5
    else:
        alpha = float(in_alpha)

    # Set default values if no l1_ratio is provided
    if float(in_l1_ratio) is None:
        l1_ratio = 0.5
    else:
        l1_ratio = float(in_l1_ratio)

    # Useful for multiple runs
    mlflow.end_run()
    with mlflow.start_run():
        # Execute ElasticNet
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        # Evaluate Metrics
        predicted_qualities = lr.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        # Print out metrics
        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        # Log parameter, metrics, and model to MLflow
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)
        mlflow.sklearn.log_model(lr, "model")

In [29]:
# Run the above training code with different hyperparameters (9 runs)
alphas = [0.25, 0.5, 0.75]
l1_ratios = [0.25, 0.5, 0.75]
for alpha in alphas:
    for l1_ratio in l1_ratios:
        train(alpha, l1_ratio)

Elasticnet model (alpha=0.250000, l1_ratio=0.250000):
  RMSE: 0.7380489682487518
  MAE: 0.5690312554727687
  R2: 0.22820122626467798




Elasticnet model (alpha=0.250000, l1_ratio=0.500000):
  RMSE: 0.748930783857188
  MAE: 0.5806946169417598
  R2: 0.20527460024945354




Elasticnet model (alpha=0.250000, l1_ratio=0.750000):
  RMSE: 0.7662476663327954
  MAE: 0.5985976516559472
  R2: 0.1680982095420568




Elasticnet model (alpha=0.500000, l1_ratio=0.250000):
  RMSE: 0.7596554775612442
  MAE: 0.5913132541174235
  R2: 0.18235068599935977




Elasticnet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.7931640229276851
  MAE: 0.6271946374319586
  R2: 0.10862644997792614




Elasticnet model (alpha=0.500000, l1_ratio=0.750000):
  RMSE: 0.8318658159940802
  MAE: 0.6651040854928951
  R2: 0.019516509058132292




Elasticnet model (alpha=0.750000, l1_ratio=0.250000):
  RMSE: 0.7837307525653582
  MAE: 0.6165474987409884
  R2: 0.1297029612600864




Elasticnet model (alpha=0.750000, l1_ratio=0.500000):
  RMSE: 0.8318702776765884
  MAE: 0.6651291355677875
  R2: 0.019505991453757976




Elasticnet model (alpha=0.750000, l1_ratio=0.750000):
  RMSE: 0.8331799787336064
  MAE: 0.669234506901795
  R2: 0.016416170929074214




In [30]:
import logging

In [31]:
model_path = "s3://mlflow/17e84dbbb027428a9a5c58f4034a1a72/artifacts/model"

In [32]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


np.random.seed(40)

# Read the wine-quality csv file from the URL
csv_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
try:
    data = pd.read_csv(csv_url, sep=";")
except Exception as e:
    logging.exception(
        "Unable to download training & test CSV, check your internet connection. Error: %s",
        e,
    )
# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)
# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]

# Loading the model
loaded_model = mlflow.sklearn.load_model(model_path)

# Evaluate Metrics
predicted_qualities = loaded_model.predict(test_x)
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

# Print out metrics
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)

  RMSE: 0.8318658159940802
  MAE: 0.6651040854928951
  R2: 0.019516509058132292
