In [2]:
from __future__ import print_function
import mlflow
mlflow.set_tracking_uri("databricks")
mlflow.tracking.get_tracking_uri()

'databricks'

In [3]:
import time
def now():
    now = int(time.time()+.5)
    dt = time.strftime("%Y-%m-%d_%H:%M:%S", time.gmtime(now))
    return dt

In [4]:
print("Run Start:",now())

Run Start: 2020-06-08_07:34:10


In [5]:
experiment_name = "/Users/<your_username>/<your_experiment_name>"
run_origin = "jupyter-lab"

In [6]:
print("MLflow Version:",mlflow.version.VERSION)
mlflow.set_experiment(experiment_name)
mlflow_client = mlflow.tracking.MlflowClient()
experiment_id = mlflow_client.get_experiment_by_name(experiment_name).experiment_id
print("experiment_id:",experiment_id)
#print("experiment_name:",experiment_name)

MLflow Version: 1.8.0
experiment_id: 2974434


In [7]:
import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [8]:
import logging 
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [9]:
def eval_metrics(actual, pred):
  rmse = np.sqrt(mean_squared_error(actual, pred))
  mae = mean_absolute_error(actual, pred)
  r2 = r2_score(actual, pred)
  return rmse, mae, r2

In [10]:
def train(alpha, l1_ratio):
  warnings.filterwarnings("ignore")
  np.random.seed(40)

  csv_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'

  try:
    data = pd.read_csv(csv_url, sep=';')

  except Exception as e:
    logger.exception("Unable to download training & test CSV, check your internet connection. Error: %s", e)

  train, test = train_test_split(data)

  train_x = train.drop(["quality"], axis = 1)
  test_x = test.drop(["quality"], axis = 1)
  train_y = train[["quality"]]
  test_y = test[["quality"]]

  #sys. argv is a list in Python, which contains the command-line arguments passed to the script.
  #Here 0.5 is the default value if no command line argument is provided then.
  if float(alpha) is None:
    alpha = 0.5
  else:
    alpha = float(alpha)

  if float(l1_ratio) is None:
    l1_ratio = 0.5
  else:
    l1_ratio = float(l1_ratio)

  with mlflow.start_run():
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)

    predicted_qualities = lr.predict(test_x)

    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    print("ElasticNet model (alpha=%f, l1_ratio=%f):" %(alpha, l1_ratio))
    print("  RMSE: %s" %rmse)
    print("  MAE: %s" %mae)
    print("  R2: %s" %r2)
    
    
#     # generate a no skill prediction for ROC curve
#     ns_probs = [0 for _ in range(len(test_y))]
    
#     # calculate scores
#     ns_auc = roc_auc_score(test_y, ns_probs)
#     lr_auc = roc_auc_score(test_y, predicted_qualities)
    
#     # summarize scores
#     print('No Skill: ROC AUC=%.3f' % (ns_auc))
#     print('Logistic: ROC AUC=%.3f' % (lr_auc))
    
#     # calculate roc curves
#     ns_fpr, ns_tpr, _ = roc_curve(test_y, ns_probs)
#     lr_fpr, lr_tpr, _ = roc_curve(test_y, predicted_qualities)

    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)
    
    

    



In [11]:
train(0.2,0.2)

ElasticNet model (alpha=0.200000, l1_ratio=0.200000):
  RMSE: 0.7336400911821402
  MAE: 0.5643841279275428
  R2: 0.2373946606358417


In [12]:
train(0.4,0.4)

ElasticNet model (alpha=0.400000, l1_ratio=0.400000):
  RMSE: 0.764461958746835
  MAE: 0.5966303605775051
  R2: 0.1719711149147427


In [13]:
train(0.5,0.5)

ElasticNet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.793164022927685
  MAE: 0.6271946374319586
  R2: 0.10862644997792625


In [14]:
train(0.9,0.7)

ElasticNet model (alpha=0.900000, l1_ratio=0.700000):
  RMSE: 0.8331069437643933
  MAE: 0.669742489026651
  R2: 0.016588601539516357
