# MLflow Train Wine Quality Notebook
This is a Quick Start notebook.
* It is based on [train.py](https://github.com/databricks/mlflow/blob/master/example/tutorial/train.py) from [MLflow's tutorial](https://mlflow.org/docs/latest/tutorial.html). 
* It creates runs in the experiment "py/sk/ElasticNet/WineQuality".


In [36]:
from __future__ import print_function
import mlflow

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.tracking.get_tracking_uri()

'http://localhost:5000'

In [37]:
def now():
    now = int(time.time()+.5)
    dt = time.strftime("%Y-%m-%d_%H:%M:%S", time.gmtime(now))
    return dt

In [38]:
print("Run Start:",now())

Run Start: 2019-03-26_02:10:09


In [39]:
experiment_name = "py/sk/ElasticNet/WineQuality"
wine_data_path = "./data/wine-quality-white.csv"
wine_data_url = "https://raw.githubusercontent.com/mlflow/mlflow/master/examples/sklearn_elasticnet_wine/wine-quality.csv"
run_origin = "jupyter"

In [40]:
import mlflow
print("MLflow Version:",mlflow.version.VERSION)
mlflow.set_experiment(experiment_name)
mlflow_client = mlflow.tracking.MlflowClient()
experiment_id = mlflow_client.get_experiment_by_name(experiment_name).experiment_id
print("experiment_id:",experiment_id)
print("experiment_name:",experiment_name)

MLflow Version: 0.8.2
experiment_id: 5
experiment_name: py/sk/ElasticNet/WineQuality


In [41]:
import os
import requests

if not os.path.exists(wine_data_path):
    print("Downloading {} to {}".format(wine_data_url,wine_data_path))
    rsp = requests.get(wine_data_url)
    with open(wine_data_path, 'w') as f:
        f.write(rsp.text)

#### Write your ML code based on the`train.py` code
This tutorial is based on the MLflow's example [train.py](https://github.com/databricks/mlflow/blob/master/example/tutorial/train.py), which uses an external [wine-quality.csv](https://github.com/databricks/mlflow/blob/master/example/tutorial/wine-quality.csv) dataset to predict wine quality.

In [42]:
# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.

import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.sklearn

def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

In [43]:
from sklearn.linear_model import enet_path
import matplotlib.pyplot as plt
from itertools import cycle

def plot_enet_descent_path(X, y, l1_ratio, plot_file):
    # Compute paths
    eps = 5e-3  # the smaller it is the longer is the path

    # Reference the global image variable
    global image
    
    print("Computing regularization path using the elastic net.")
    alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=l1_ratio, fit_intercept=False)

    # Display results
    fig = plt.figure(1)
    ax = plt.gca()

    colors = cycle(['b', 'r', 'g', 'c', 'k'])
    neg_log_alphas_enet = -np.log10(alphas_enet)
    for coef_e, c in zip(coefs_enet, colors):
        l1 = plt.plot(neg_log_alphas_enet, coef_e, linestyle='--', c=c)

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    title = 'ElasticNet Path by alpha for l1_ratio = ' + str(l1_ratio)
    plt.title(title)
    plt.axis('tight')

    image = fig
    fig.savefig(plot_file)
    plt.close(fig)
    return image   

In [44]:
def train(alpha, l1_ratio):
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    data = pd.read_csv(wine_data_path)

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]
    with mlflow.start_run() as run:
        run_id = run.info.run_uuid
        print("run_id:",run_id)
        print("run_origin:",run_origin)
        clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        clf.fit(train_x, train_y)

        predicted_qualities = clf.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha={}, l1_ratio={}):".format(alpha, l1_ratio))
        print("  RMSE:",rmse)
        print("  MAE:",mae)
        print("  R2:",r2)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_param("run_origin", run_origin)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(clf, "model")
        
        X = data.drop(["quality"], axis=1).values
        y = data[["quality"]].values.ravel()
        plot_file = "wine_quality.png"
        plot_enet_descent_path(X, y, l1_ratio, plot_file)
        mlflow.log_artifact(plot_file)
        
        return (rmse,r2,mae)

In [45]:
train(0.1, 0.1)

run_id: d8b189ad970e44f4978c5b19bbf6fa1e
run_origin: jupyter
Elasticnet model (alpha=0.1, l1_ratio=0.1):
  RMSE: 0.7792546522251949
  MAE: 0.6112547988118587
  R2: 0.2157063843066196
Computing regularization path using the elastic net.


(0.7792546522251949, 0.2157063843066196, 0.6112547988118587)

In [46]:
print("Run End:",now())

Run End: 2019-03-26_02:10:10
