In [1]:
!pip install mlflow sklearn boxkite dataclasses boto3

Defaulting to user installation because normal site-packages is not writeable
Collecting mlflow
  Downloading mlflow-1.15.0-py3-none-any.whl (14.2 MB)
[K     |████████████████████████████████| 14.2 MB 4.1 MB/s eta 0:00:01
[?25hCollecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
Collecting boxkite
  Downloading boxkite-0.0.2-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 15.2 MB/s eta 0:00:01
[?25hCollecting dataclasses
  Downloading dataclasses-0.8-py3-none-any.whl (19 kB)
Collecting boto3
  Downloading boto3-1.17.47-py2.py3-none-any.whl (131 kB)
[K     |████████████████████████████████| 131 kB 12.0 MB/s eta 0:00:01
Collecting alembic<=1.4.1
  Downloading alembic-1.4.1.tar.gz (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 17.5 MB/s eta 0:00:01
[?25hCollecting Flask
  Downloading Flask-1.1.2-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 7.6 MB/s  eta 0:00:01
Collecting prometheus-flask-exporter
  Downlo

In [1]:
import pickle

from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from boxkite.monitoring.service import ModelMonitoringService

import mlflow
mlflow.sklearn.autolog()

In [3]:
with mlflow.start_run() as run:

    bunch = load_diabetes()
    X_train, X_test, Y_train, Y_test = train_test_split(
        bunch.data, bunch.target
    )
    model = LinearRegression()
    model.fit(X_train, Y_train)

    print("Score: %.2f" % model.score(X_test, Y_test))
    with open("./model.pkl", "wb") as f:
        pickle.dump(model, f)

    features = zip(*[bunch.feature_names, X_train.T])
    # features = [("age", [33, 23, 54, ...]), ("sex", [0, 1, 0]), ...]
    ModelMonitoringService.export_text(
        features=features, path="./histogram.prom",
    )
    mlflow.log_artifact("./histogram.prom")


Score: 0.54


In [6]:
import mlflow
# Get this from MLflow UI
logged_model = 's3://mlflow-artifacts/0/134f7343adf342b68046efe7e4f389c3/artifacts/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(pd.DataFrame(X_test))

array([123.04994884, 227.07801219,  90.28349888, 136.99993405,
        64.42123992, 102.24358733, 280.7955021 , 197.65219032,
       224.75650891, 280.10278244,  77.70495443, 166.78218112,
       103.52713245, 167.09631954, 153.35756698, 240.63926662,
       136.04693198, 169.47984237, 124.5833244 , 279.29183373,
       112.57086981,  94.20578999, 239.19823543, 125.69567902,
       186.09269719, 237.64952599, 146.44545148, 162.28794204,
        68.53966597, 137.70198155, 201.14109399, 121.35056669,
       108.24311489, 221.89775819, 141.12743574,  34.47048126,
       219.56452278,  70.59666586, 224.97612627, 140.26183757,
        68.31699084, 236.37096734, 165.76641029,  75.07197914,
       212.04752004, 189.80484158, 116.79844654, 142.79529704,
       150.32015566, 117.64560673, 102.66243265, 195.11165152,
        75.06959993, 157.77660561, 141.0845715 , 190.15881287,
       156.08816333,  78.21346932, 187.04922109, 111.54161513,
        96.48270088,  88.09327992, 148.15255386, 159.31

In [12]:
type(loaded_model._model_impl)

sklearn.linear_model._base.LinearRegression

## What to do next

Port https://github.com/basisai/boxkite/blob/master/examples/grafana-prometheus/app/serve_completed.py
to a container that can speak to MLflow.

Have it fetch a model by ID along with its histogram and serve it while exposing Prom metrics.

Have Prom + Grafana in the cluster serve an appropriate dashboard (prom dashboard auto setup with provisioning or Grafana terraform provider).