In [1]:
import os
import warnings
import sys

import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.sklearn

In [14]:
data_path = "datasets/wine-quality.csv"
data = pd.read_csv(data_path,sep=',')

data.sample(10)

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
2184            6.4             0.330         0.24             9.8      0.041   
3533            6.6             0.220         0.30            14.7      0.045   
52              6.2             0.160         0.33             1.1      0.057   
2905            7.6             0.310         0.26             1.7      0.073   
2854            6.4             0.210         0.28             5.9      0.047   
3298            6.3             0.240         0.35             2.3      0.039   
2451            6.6             0.560         0.15            10.0      0.037   
207            10.2             0.440         0.88             6.2      0.049   
4835            5.6             0.295         0.26             1.1      0.035   
2088            8.7             0.150         0.30             1.6      0.046   

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
2184                 29.0      

In [3]:
# mlflow server --backend-store-uri mlruns/ --default-artifact-root mlruns/ --host 0.0.0.0 --port 5000
remote_server_uri = "http://127.0.0.1:5000/" # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)  # or set the MLFLOW_TRACKING_URI in the env

In [4]:
mlflow.tracking.get_tracking_uri()

'http://127.0.0.1:5000/'

In [5]:
exp_name = "ElasticNet_wine"
mlflow.set_experiment(exp_name)

<Experiment: artifact_location='mlruns/1', experiment_id='1', lifecycle_stage='active', name='ElasticNet_wine', tags={}>

In [15]:
def eval_metrics(actual, pred):
    # compute relevant metrics
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


def load_data(data_path):
    data = pd.read_csv(data_path,sep=',')

    # Split the data into training and test sets. (0.75, 0.25) split.
    train1, test1 = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train1.drop(["quality"], axis=1)
    test_x = test1.drop(["quality"], axis=1)
    train_y = train1[["quality"]]
    test_y = test1[["quality"]]
    return train_x, train_y, test_x, test_y

def train(alpha=0.5, l1_ratio=0.5):
    # train a model with given parameters
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    data_path = "datasets/wine-quality.csv"
    train_x, train_y, test_x, test_y = load_data(data_path)

    # Useful for multiple runs (only doing one run in this sample notebook)    
    with mlflow.start_run():
        # Execute ElasticNet
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)
        
        # Evaluate Metrics
        predicted_qualities = lr.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        # Print out metrics
        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        # Log parameter, metrics, and model to MLflow
        mlflow.log_param(key="alpha", value=alpha)
        mlflow.log_param(key="l1_ratio", value=l1_ratio)
        mlflow.log_metric(key="rmse", value=rmse)
        mlflow.log_metrics({"mae": mae, "r2": r2})
        mlflow.log_artifact(data_path)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(lr, "model")

In [16]:
train(0.5, 0.5)

Elasticnet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.8222428497595401
  MAE: 0.6278761410160693
  R2: 0.12678721972772666
Save to: mlruns/1/cb7747421c5e499189a767f9ffb837f6/artifacts


In [17]:
train(0.2, 0.2)

Elasticnet model (alpha=0.200000, l1_ratio=0.200000):
  RMSE: 0.7859129997062342
  MAE: 0.6155290394093895
  R2: 0.20224631822892092
Save to: mlruns/1/37d9c6ebac19408aaa0de7e054001432/artifacts


In [18]:
train(0.1, 0.1)

Elasticnet model (alpha=0.100000, l1_ratio=0.100000):
  RMSE: 0.7792546522251949
  MAE: 0.6112547988118587
  R2: 0.2157063843066196
Save to: mlruns/1/95710eedf3d240a5892909b8dba80827/artifacts
