# Hello World MLFlow on CVM

#### Install Dependencies

Before running this notebook:
* you may want to change your default brower to Edge from IE
* install dependencies in the Python env ```pip install azure-ai-ml azureml-mlflow mlflow pandas scikit-learn```
* Install Azure CLI and run `az login`

#### Import dependencies

In [None]:
import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.sklearn
from mlflow.tracking.client import MlflowClient

import logging

from azure.identity import DefaultAzureCredential
from azure.identity import AzureCliCredential
from azure.ai.ml import automl, Input, MLClient

#### Link AML Workspace

In [None]:
subscription_id = "<your subscription id here>"
resource_group = "<your resource group here>"
workspace_name = "<your workspace here>"

In [None]:
credential = DefaultAzureCredential()

ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)
mlflow_tracking_uri = ml_client.workspaces.get(name=ml_client.workspace_name).mlflow_tracking_uri
mlflow.set_tracking_uri(mlflow_tracking_uri)

mlflow.set_experiment('cvm-mlflow-hello-world')

#### Train Model, Uploading Metrics and Model to AML Workspace

In [None]:
# Wine Quality Sample
in_alpha = 0.1
in_l1_ratio = 0.1


logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


warnings.filterwarnings("ignore")
np.random.seed(40)

# Read the wine-quality csv file from the URL
csv_url =\
    'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
try:
    data = pd.read_csv(csv_url, sep=';')
except Exception as e:
    logger.exception(
        "Unable to download training & test CSV, check your internet connection. Error: %s", e)

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)

# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]

# Set default values if no alpha is provided
if float(in_alpha) is None:
    alpha = 0.5
else:
    alpha = float(in_alpha)

# Set default values if no l1_ratio is provided
if float(in_l1_ratio) is None:
    l1_ratio = 0.5
else:
    l1_ratio = float(in_l1_ratio)
    
# Useful for multiple runs (only doing one run in this sample notebook)    
with mlflow.start_run() as run:
    
    # Execute ElasticNet
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)

    # Evaluate Metrics
    predicted_qualities = lr.predict(test_x)
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    # Print out metrics
    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)

    # Log parameter, metrics, and model to MLflow
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)

    mlflow.sklearn.log_model(lr, "model")

#### You should now be able to see your run, metrics, and model in the AML Workspace Studio UI!

#### Download Model from AML

In [None]:
run_id = run.info.run_id

# Initialize MLFlow client
mlflow_client = MlflowClient()

# Create local folder
local_dir = f"./{run_id}"
if not os.path.exists(run_id):
    os.mkdir(run_id)
    
downloaded_path = mlflow_client.download_artifacts(
    run_id, "model", local_dir
)

print(downloaded_path)

#### Can now kick off mlflow server and then inference

Kick off server in one cmd
```mlflow models serve -m {downloaded_path} -p 1234```

Send an inference request in another
`curl -X POST -H "Content-Type:application/json; format=pandas-split" --data "{\"columns\":[\"alcohol\", \"chlorides\", \"citric acid\", \"density\", \"fixed acidity\", \"free sulfur dioxide\", \"pH\", \"residual sugar\", \"sulphates\", \"total sulfur dioxide\", \"volatile acidity\"],\"data\":[[12.8, 0.029, 0.48, 0.98, 6.2, 29, 3.33, 1.2, 0.39, 75, 0.66]]}" http://127.0.0.1:1234/invocations`