In [1]:
!pip install mlflow

Collecting mlflow
[?25l  Downloading https://files.pythonhosted.org/packages/aa/2d/7fa1f6e310ded489d943ea20cd7977a9867cb8d81b526d9c9460ce4a5b39/mlflow-1.11.0-py3-none-any.whl (13.9MB)
[K    100% |████████████████████████████████| 13.9MB 1.6MB/s ta 0:00:011   28% |█████████                       | 3.9MB 8.5MB/s eta 0:00:02    37% |███████████▉                    | 5.1MB 10.2MB/s eta 0:00:01
Collecting querystring-parser (from mlflow)
  Downloading https://files.pythonhosted.org/packages/4a/fa/f54f5662e0eababf0c49e92fd94bf178888562c0e7b677c8941bbbcd1bd6/querystring_parser-1.2.4.tar.gz
Collecting sqlparse (from mlflow)
[?25l  Downloading https://files.pythonhosted.org/packages/85/ee/6e821932f413a5c4b76be9c5936e313e4fc626b33f16e027866e1d60f588/sqlparse-0.3.1-py2.py3-none-any.whl (40kB)
[K    100% |████████████████████████████████| 40kB 4.5MB/s ta 0:00:01
[?25hCollecting databricks-cli>=0.8.7 (from mlflow)
[?25l  Downloading https://files.pythonhosted.org/packages/1e/57/5c2d6b83cb8753

[K    100% |████████████████████████████████| 51kB 8.8MB/s eta 0:00:01
Building wheels for collected packages: querystring-parser, databricks-cli, alembic, prometheus-flask-exporter
  Running setup.py bdist_wheel for querystring-parser ... [?25ldone
[?25h  Stored in directory: /Users/ben/Library/Caches/pip/wheels/1e/41/34/23ebf5d1089a9aed847951e0ee375426eb4ad0a7079d88d41e
  Running setup.py bdist_wheel for databricks-cli ... [?25ldone
[?25h  Stored in directory: /Users/ben/Library/Caches/pip/wheels/63/d0/4f/3deeca1f4c47a6aca7c2c6a6e2bf272391565dc86a7718a59b
  Running setup.py bdist_wheel for alembic ... [?25ldone
[?25h  Stored in directory: /Users/ben/Library/Caches/pip/wheels/84/07/f7/12f7370ca47a66030c2edeedcc23dec26ea0ac22dcb4c4a0f3
  Running setup.py bdist_wheel for prometheus-flask-exporter ... [?25ldone
[?25h  Stored in directory: /Users/ben/Library/Caches/pip/wheels/f0/ba/c4/62b4809cae9e7823ae296909e887c1b60eade9415ca4a286c2
Successfully built querystring-parser databri

In [None]:
# this is adapted from https://github.com/mlflow/mlflow/blob/master/examples/sklearn_elasticnet_wine/train.ipynb

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

csv_url =\
    'http://archive.ics.uci.edu/ml/machine-' \
    'learning-databases/wine-quality/winequality-red.csv'
data = pd.read_csv(csv_url, sep=';')

In [10]:
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [11]:
# Split the data into training and test sets. (0.75, 0.25) split.
# The predicted column is "quality" which is a scalar from [3, 9]
train_x, test_x, train_y, test_y = train_test_split(
    data.drop(['quality'], axis=1),
    data['quality']
)

In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

In [13]:
import mlflow

mlflow.set_tracking_uri('http://0.0.0.0:5000')  # set to your server URI
mlflow.set_experiment('/wine')

In [17]:
from sklearn.linear_model import ElasticNet
import mlflow.sklearn

np.random.seed(40)

def train(alpha=0.5, l1_ratio=0.5):
    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)
        predicted = lr.predict(test_x)
        rmse, mae, r2 = eval_metrics(test_y, predicted)

        model_name = lr.__class__.__name__
        print('{} (alpha={}, l1_ratio={}):'.format(
            model_name, alpha, l1_ratio
        ))
        print(' RMSE: %s' % rmse)
        print(' MAE: %s' % mae)
        print(' R2: %s' % r2)

        mlflow.log_params({key: value for key, value in lr.get_params().items()})
        mlflow.log_metric('rmse', rmse)
        mlflow.log_metric('r2', r2)
        mlflow.log_metric('mae', mae)
        mlflow.sklearn.log_model(lr, model_name)

In [18]:
train(0.5, 0.5)

ElasticNet (alpha=0.5, l1_ratio=0.5):
  RMSE: 0.7325693777577805
  MAE: 0.5895721434715478
  R2: 0.12163690293641838


In [19]:
train(0.1, 0.5)

ElasticNet (alpha=0.1, l1_ratio=0.5):
  RMSE: 0.6832521710295818
  MAE: 0.5350826216023779
  R2: 0.23592040719074103


In [20]:
train(0.8, 0.5)

ElasticNet (alpha=0.8, l1_ratio=0.5):
  RMSE: 0.7713038517785624
  MAE: 0.6344212065633348
  R2: 0.026294640912563283


In [21]:
train(0.1, 0.2)

ElasticNet (alpha=0.1, l1_ratio=0.2):
  RMSE: 0.6740753299699419
  MAE: 0.5276949437873688
  R2: 0.25630745861273185


In [22]:
train(0.1, 0.3)

ElasticNet (alpha=0.1, l1_ratio=0.3):
  RMSE: 0.6781545799635063
  MAE: 0.5308991080094628
  R2: 0.2472791287278865


In [23]:
train(0.2, 0.2)

ElasticNet (alpha=0.2, l1_ratio=0.2):
  RMSE: 0.6844856006568806
  MAE: 0.5375117838920673
  R2: 0.2331592331820277


In [24]:
train(0.1, 0.1)

ElasticNet (alpha=0.1, l1_ratio=0.1):
  RMSE: 0.6690250543541869
  MAE: 0.5236546308642179
  R2: 0.2674094302489908


In [25]:
train(0.05, 0.05)

ElasticNet (alpha=0.05, l1_ratio=0.05):
  RMSE: 0.6586566565045078
  MAE: 0.5152309297379374
  R2: 0.28994051940308996
