In [20]:
import pandas as pd 
import logging
import pickle 
import mlflow 
import mlflow.sklearn 
from mlflow.models.signature import infer_signature
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet 
from dotenv import load_dotenv
from pprint import pprint
from mlflow.tracking import MlflowClient
import warnings
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from mlflow.models.signature import infer_signature

import logging

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
client = MlflowClient()


def get_data():
    # Read the wine-quality csv file from the URL
    csv_url = (
        "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    )
    try:
        df = pd.read_csv(csv_url, sep=";")
    except Exception as e:
        logger.exception(
            "Unable to download training & test CSV, check your internet connection. Error: %s", e
        )
    return df 


df = get_data()


In [2]:
import os
print("Tracking server uri before loading env vars:")
print(mlflow.tracking.get_tracking_uri())
print("\n")
load_dotenv()
print("Tracking server uri after loading env vars:")
print(mlflow.tracking.get_tracking_uri())
DEFAULT_ARTIFACTS_URI = os.environ['MLFLOW_TRACKING_URI']

Tracking server uri before loading env vars:
file:///home/akinwilson/Projects/mlops/model/mlruns


Tracking server uri after loading env vars:
http://mlops-alb-development-888296804.eu-west-2.elb.amazonaws.com/mlops/


In [3]:
from mlflow.store.artifact.mlflow_artifacts_repo import MlflowArtifactsRepository
print(f"Current artifacts associated to {DEFAULT_ARTIFACTS_URI}:")
MlflowArtifactsRepository(DEFAULT_ARTIFACTS_URI).list_artifacts()

Current artifacts associated to http://mlops-alb-development-888296804.eu-west-2.elb.amazonaws.com/mlops/:


[]

### Defining sampling model training routine with mlflow logging

- parameter tracking
- metric tracking
- model logging 

In [52]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2



def get_model_signature(x, y_hat):
    '''inputs are expected to be a dataframe and the prediction of the model'''
    return infer_signature(x, y_hat)


def training():
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the wine-quality csv file from the URL
    csv_url = (
        "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    )
    try:
        data = pd.read_csv(csv_url, sep=";")
    except Exception as e:
        logger.exception(
            "Unable to download training & test CSV, check your internet connection. Error: %s", e
        )
    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)
    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    alpha =  0.5
    l1_ratio = 0.5

    exp_id = mlflow.create_experiment("testing-V13")
    print(mlflow.get_experiment(exp_id))
    
    mlflow.start_run(experiment_id=exp_id)
    
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)
    predicted_qualities = lr.predict(test_x)
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
    print(f"Elasticnet model (alpha={alpha}, l1_ratio={l1_ratio}):")
    print(f"  RMSE: {rmse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")
    # param logging
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    # metric logging
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)
    # model logging
    model_sig = get_model_signature(test_x, predicted_qualities)
    mlflow.sklearn.log_model(sk_model=lr,
                            artifact_path="model",
                            registered_model_name="ElasticnetWineModel",
                            signature=model_sig)
    mlflow.end_run()

In [53]:
training()
# mlflow.end_run()

<Experiment: artifact_location='mlflow-artifacts:/13', experiment_id='13', lifecycle_stage='active', name='testing-V13', tags={}>
Elasticnet model (alpha=0.5, l1_ratio=0.5):
  RMSE: 0.7931640229276851
  MAE: 0.6271946374319586
  R2: 0.10862644997792614


Registered model 'ElasticnetWineModel' already exists. Creating a new version of this model...
2022/04/20 13:50:19 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: ElasticnetWineModel, version 5
Created version '5' of model 'ElasticnetWineModel'.


In [32]:
!cat .env

MLFLOW_TRACKING_URI=http://mlops-alb-development-888296804.eu-west-2.elb.amazonaws.com/mlops/
MLFLOW_TRACKING_PASSWORD=admin
MLFLOW_TRACKING_USERNAME=admin

# Add or updating model description

In [15]:
client = MlflowClient()

desc =""" Model trained on  two datasets are related to red and white variants of the Portuguese 
        'Vinho Verde' wine. For more details, consult the reference [Cortez et al., 2009]."""


client.update_model_version(
    name="ElasticnetWineModel",
    version=1,
    description=desc
)


<ModelVersion: creation_timestamp=1650278328899, current_stage='Production', description=(' Model trained on  two datasets are related to red and white variants of the '
 'Portuguese \n'
 "        'Vinho Verde' wine. For more details, consult the reference [Cortez "
 'et al., 2009].'), last_updated_timestamp=1650279129615, name='ElasticnetWineModel', run_id='6c70852082c643e080835b6405e014ed', run_link='', source='mlflow-artifacts:/1/6c70852082c643e080835b6405e014ed/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

# Show registered Models

In [8]:
for rm in client.list_registered_models():
    pprint(dict(rm), indent=4)

{   'creation_timestamp': 1650278328791,
    'description': '',
    'last_updated_timestamp': 1650278328899,
    'latest_versions': [   <ModelVersion: creation_timestamp=1650278328899, current_stage='None', description='', last_updated_timestamp=1650278328899, name='ElasticnetWineModel', run_id='6c70852082c643e080835b6405e014ed', run_link='', source='mlflow-artifacts:/1/6c70852082c643e080835b6405e014ed/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>],
    'name': 'ElasticnetWineModel',
    'tags': {}}


In [9]:
dict(rm)

{'creation_timestamp': 1650278328791,
 'description': '',
 'last_updated_timestamp': 1650278328899,
 'latest_versions': [<ModelVersion: creation_timestamp=1650278328899, current_stage='None', description='', last_updated_timestamp=1650278328899, name='ElasticnetWineModel', run_id='6c70852082c643e080835b6405e014ed', run_link='', source='mlflow-artifacts:/1/6c70852082c643e080835b6405e014ed/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>],
 'name': 'ElasticnetWineModel',
 'tags': {}}

# Transition model to stage

In [10]:
client.transition_model_version_stage(
    name="ElasticnetWineModel",
    version=1,
    stage="Production"
)

<ModelVersion: creation_timestamp=1650278328899, current_stage='Production', description='', last_updated_timestamp=1650278613192, name='ElasticnetWineModel', run_id='6c70852082c643e080835b6405e014ed', run_link='', source='mlflow-artifacts:/1/6c70852082c643e080835b6405e014ed/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

# Serving model
_to do: need to include mlflowProject to ensure dependencies are all present_

In [55]:
import mlflow
logged_model = 'runs:/c48c26e1ae474f3cae796c7a05454cf2/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
data = get_data()

loaded_model.predict(pd.DataFrame(data))

array([5.57688397, 5.50664777, 5.52550482, ..., 5.76010711, 5.6899349 ,
       5.69732596])

In [13]:
# Set environment variable for the tracking URL where the Model Registry resides
!export MLFLOW_TRACKING_URI=DEFAULT_ARTIFACTS_URI
# Serve the production model from the model registry
!mlflow models serve -m "models:/ElasticnetWineModel/Production"

2022/04/18 11:46:33 INFO mlflow.models.cli: Selected backend for flavor 'python_function'
Traceback (most recent call last):
  File "/home/akinwilson/.pyenv/versions/3.8.2/bin/mlflow", line 8, in <module>
    sys.exit(cli())
  File "/home/akinwilson/.local/lib/python3.8/site-packages/click/core.py", line 1128, in __call__
    return self.main(*args, **kwargs)
  File "/home/akinwilson/.local/lib/python3.8/site-packages/click/core.py", line 1053, in main
    rv = self.invoke(ctx)
  File "/home/akinwilson/.local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/home/akinwilson/.local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/home/akinwilson/.local/lib/python3.8/site-packages/click/core.py", line 1395, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/home/akinwilson/.local/lib/python3.8/site-pack