In [None]:
# documentation
# https://www.mlflow.org/docs/latest/index.html

In [4]:
import mlflow
import requests
import pandas as pd
import json
import os
import numpy as np
import waitress
from flask import Flask, request
from tqdm import tqdm

from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

# 1. Local Tracking

In [None]:
data_url = r'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'

# load data
df = pd.read_csv(data_url, sep=';')
df_train, df_test = train_test_split(df, test_size=0.2, random_state=123)

# split
label   = 'quality'
X_train = df_train.drop(columns=label)
y_train = df_train[label]
X_test  = df_test.drop(columns=label)
y_test  = df_test[label]

In [None]:
# evaluation function

def eval_metrics(actual, pred):
    
    rmse = np.sqrt(mean_squared_error(actual, pred)) 
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    return rmse, mae, r2

In [None]:
# set up experiment

mlruns_path = os.path.join(os.path.abspath('.'), 'mlruns')

tracking_uri = f"file:{mlruns_path}"
# Need to have mlruns as the folder name!

experiment_name = 'Wine Quality - Elastic Net'

mlflow.set_tracking_uri(tracking_uri)
mlflow.set_experiment(experiment_name)

In [None]:
# set parameters

n_runs = 10
max_alpha = 10

In [None]:
# run experiments

for run in range(n_runs):
    with mlflow.start_run():
        
        # random hyperparameters
        alpha = np.random.uniform(0, max_alpha)
        l1_ratio = np.random.rand()

        # model fitting
        model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
        model.fit(X_train, y_train)

        # model evaluation
        rmse_train, mae_train, r2_train = eval_metrics(y_train, model.predict(X_train))
        rmse_test, mae_test, r2_test = eval_metrics(y_test, model.predict(X_test))

        # Reporting
        mlflow.log_param('alpha', alpha)
        mlflow.log_param('l1_ratio', l1_ratio)
        
        mlflow.log_metric('rmse_train', rmse_train)
        mlflow.log_metric('mae_train', mae_train)
        mlflow.log_metric('r2_train', r2_train)
        
        mlflow.log_metric('rmse_test', rmse_test)
        mlflow.log_metric('mae_test', mae_test)
        mlflow.log_metric('r2_test', r2_test)
        
        mlflow.sklearn.log_model(model, 'model')

In [None]:
# now go to the ui

## in cmd: path = ..\..\notebooks (parent directory of mlruns)
## in cmd: mlflow ui
## in browser: go to localhost:5000 or http://127.0.0.1:5000/

# 2. Remote Tracking

Not currently working

In [None]:
# set up experiment

tracking_uri = r'http://52.90.101.28:5000/'
# this one was made on AWS - doesnt exist now

experiment_name = 'Wine Quality - Elastic Net'

mlflow.set_tracking_uri(tracking_uri)
mlflow.set_experiment(experiment_name)

In [None]:
# set parameters

n_runs = 10
max_alpha = 10

In [None]:
# run experiments

for run in range(n_runs):
    with mlflow.start_run():
        
        # random hyperparameters
        alpha = np.random.uniform(0, max_alpha)
        l1_ratio = np.random.rand()

        # model fitting
        model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
        model.fit(X_train, y_train)

        # model evaluation
        rmse_train, mae_train, r2_train = eval_metrics(y_train, model.predict(X_train))
        rmse_test, mae_test, r2_test = eval_metrics(y_test, model.predict(X_test))

        # Reporting
        mlflow.log_param('alpha', alpha)
        mlflow.log_param('l1_ratio', l1_ratio)
        
        mlflow.log_metric('rmse_train', rmse_train)
        mlflow.log_metric('mae_train', mae_train)
        mlflow.log_metric('r2_train', r2_train)
        
        mlflow.log_metric('rmse_test', rmse_test)
        mlflow.log_metric('mae_test', mae_test)
        mlflow.log_metric('r2_test', r2_test)
        
        mlflow.sklearn.log_model(model, 'model')

# 3. Local Server Artifact Download

In [5]:
# set up experiment

mlruns_path = os.path.join(os.path.abspath('.'), 'mlruns')

tracking_uri = f"file:{mlruns_path}"
# Need to have mlruns as the folder name!

experiment_name = 'Wine Quality - Elastic Net'

mlflow.set_tracking_uri(tracking_uri)
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='file:C:\\Users\\Dell5520\\PycharmProjects\\personal\\infrastructure\\notebooks\\mlruns/0', experiment_id='0', lifecycle_stage='active', name='Wine Quality - Elastic Net', tags={}>

In [6]:
!mkdir mlflow_artifacts

destination_path = os.path.join(os.path.abspath('.'), 'mlflow_artifacts')

A subdirectory or file mlflow_artifacts already exists.


In [7]:
# define our metric
metric = r'rmse_test'

# connect to our client
client = mlflow.tracking.MlflowClient(tracking_uri=tracking_uri)

# get experiment id
experiment_id = client.get_experiment_by_name(experiment_name).experiment_id

# model runs
runs = client.search_runs(experiment_id, order_by=[f'metrics.{metric}'])

# get best run ID
best_run_id = runs[0].info.run_id

# download artifacts
client.download_artifacts(best_run_id, 'model', destination_path)

'C:\\Users\\Dell5520\\PycharmProjects\\personal\\infrastructure\\notebooks\\mlflow_artifacts\\model'

In [13]:
# make a function

def download_top_models(
    n, metric, tracking_uri, experiment_name, destination_path, model_name
):
    
    # connect to our client
    client = mlflow.tracking.MlflowClient(tracking_uri=tracking_uri)

    # get experiment id
    experiment_id = client.get_experiment_by_name(
        experiment_name
    ).experiment_id

    # model runs
    runs = client.search_runs(experiment_id, order_by=[f'metrics.{metric}'])
    
    run_ids = [run.info.run_id for run in runs[0:n]]
    
    for ind, run in tqdm(enumerate(run_ids)):
        
        model_path = os.path.join(destination_path, str(ind + 1))
        
        if not os.path.exists(model_path):
            os.mkdir(model_path)
        
        client.download_artifacts(
            run, f'{model_name}', model_path
        )
        
n = 10
metric = r'rmse_test'
model_name = 'model' # has to match what you log model as 

download_top_models(
    n, metric, tracking_uri, experiment_name, destination_path, model_name
)

10it [00:00, 33.85it/s]


In [None]:
# check out runs as a df

df_runs = pd.DataFrame([{**i.data.metrics, **i.data.params, **dict(i.info)} for i in runs])
df_runs

In [None]:
# load model

model_uri = os.path.join(destination_path, 'model')
model = mlflow.sklearn.load_model(model_uri)
print(model)

# Predicting to make sure it works
model.predict(np.zeros((1, 11)))

# 4. MLFlow & Flask

- Before running these cells, run the cells in the MLFlow Flask App notebook

In [3]:
# try get request - status code 200 is good
requests.get('http://127.0.0.1:1337/echo')

<Response [200]>

In [4]:
# Try a post request
r = requests.post('http://127.0.0.1:1337/echo', data='hello')
print(r.status_code)

200


In [6]:
# Getting data to make predictions on
data_url = r'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
data = json.dumps(pd.read_csv(data_url, sep=';').drop(columns='quality').sample(10).to_dict())

In [7]:
# Doing a post request to get predictions
r = requests.post('http://127.0.0.1:1337/invocations', data=data)
print('Content: ', r.content)

Content:  b'[5.650008902645847, 5.218801183986223, 5.444273831423893, 5.249585737472442, 5.526233529506899, 5.801264277374394, 5.546808439626528, 6.39625542879018, 5.693060101690762, 5.665863509768949]'
