In [None]:
import os
import sys
sys.path.append("/mnt/code/")

In [None]:
import os
import tempfile
import time
import ray
from ray import air, tune
from ray.air import session
import mlflow

import jwt
import json
import warnings
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [None]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

def train_function(config):
    csv_url = (
         "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    )
    try:
         data = pd.read_csv(csv_url, sep=";")
    except Exception as e:
         logger.exception(
              "Unable to download training & test CSV, check your internet connection. Error: %s", e
          )

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)
    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]
    alpha = config['alpha']
    l1_ratio = config['l1_ratio']
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)
    predicted_qualities = lr.predict(test_x)

    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)
    
    # Feed the score back to Tune.
    session.report({"rmse": rmse, "mae": mae, "r-squared": r2})
 

In [None]:

        
def tune_with_callback(mlflow_tracking_uri, experiment_name,finish_fast=False):
    print(f"Experiment Name {experiment_name}")
    mlflow.set_experiment(experiment_name)
    if mlflow.active_run():
        # End the active run
        mlflow.end_run()
    mlflow.start_run(run_name="GridSearchTrials")
    # Generate values from 0.1 to 0.9 with step size of 0.1
    alpha_values = np.arange(0.1, 0.5, 0.1)
    l1_ratio_values = np.arange(0.5, 0.8, 0.1)

    cb = MyMLflowLoggerCallback(
                    tracking_uri=mlflow_tracking_uri,
                    experiment_name=experiment_name,
                    save_artifact=True,
                    tags={"mlflow.parentRunId": mlflow.active_run().info.run_id},
                )


    # Tuner configuration with updated param_space
    tuner = tune.Tuner(
        train_function,
        run_config=air.RunConfig(
            name="mlflow",
            callbacks=[cb]
        ),
        param_space={
            "alpha": tune.grid_search(alpha_values),
            "l1_ratio": tune.grid_search(l1_ratio_values),
        },
    )
    ### With fixed number of samples
    '''
    tuner = tune.Tuner(
        train_function,
        tune_config=tune.TuneConfig(num_samples=15),
        run_config=air.RunConfig(
            name="mlflow",
            callbacks=[cb],
        ),
        param_space={
            "alpha":  tune.choice(alpha_values),
            "l1_ratio": tune.choice(l1_ratio_values)
        },
    )
    '''
    results = tuner.fit()
    mlflow.end_run()


In [None]:
temp_dir='/tmp'

if not ray.is_initialized():
    service_host = os.environ["RAY_HEAD_SERVICE_HOST"]
    service_port = os.environ["RAY_HEAD_SERVICE_PORT"]
    address=f"ray://{service_host}:{service_port}"
    ray.init(address=address)

experiment_name = 'RAY_HYPERPARAMETER_TUNING'+'-' + os.environ['DOMINO_STARTING_USERNAME'] + '-' + os.environ['DOMINO_PROJECT_NAME']
tune_with_callback(os.environ['CLUSTER_MLFLOW_TRACKING_URI'], experiment_name,finish_fast=True)
df = mlflow.search_runs(
        [mlflow.get_experiment_by_name(experiment_name).experiment_id]
    )


In [None]:
print(df)

In [None]:
#!jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace *.ipynb