In [None]:
import pickle

from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from boxkite.monitoring.service import ModelMonitoringService

import mlflow
mlflow.sklearn.autolog()

In [None]:
with mlflow.start_run() as run:

    bunch = load_diabetes()
    X_train, X_test, Y_train, Y_test = train_test_split(
        bunch.data, bunch.target
    )
    model = LinearRegression()
    model.fit(X_train, Y_train)

    print("Score: %.2f" % model.score(X_test, Y_test))
    with open("./model.pkl", "wb") as f:
        pickle.dump(model, f)

    features = zip(*[bunch.feature_names, X_train.T])
    # features = [("age", [33, 23, 54, ...]), ("sex", [0, 1, 0]), ...]
    ModelMonitoringService.export_text(
        features=features, path="./histogram.prom",
    )
    mlflow.log_artifact("./histogram.prom")


In [None]:
run.info.run_id

In [None]:
import mlflow
# Get this from MLflow UI
logged_model = f"s3://mlflow-artifacts/0/{run.info.run_id}/artifacts/model"

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(pd.DataFrame(X_test))

In [None]:
type(loaded_model._model_impl)

## What to do next

Port https://github.com/basisai/boxkite/blob/master/examples/grafana-prometheus/app/serve_completed.py
to a container that can speak to MLflow.

Have it fetch a model by ID along with its histogram and serve it while exposing Prom metrics.

Have Prom + Grafana in the cluster serve an appropriate dashboard (prom dashboard auto setup with provisioning or Grafana terraform provider).

In [None]:
!env |grep "AWS\|MLFLOW"

In [None]:
# TODO: consider using bodywork to simplify the k8s stuff for data scientists.
import os

version = "e7a70df"

deployment = f"""
apiVersion: apps/v1
kind: Deployment
metadata:
  name: ml-deployment
  labels:
    app: ml-server
spec:
  replicas: 3
  selector:
    matchLabels:
      app: ml-server
  template:
    metadata:
      labels:
        app: ml-server
      annotations:
        prometheus.io/scrape: "true"
    spec:
      containers:
      - name: ml-server
        image: quay.io/boxkite/boxkite-app:{version}
        ports:
        - containerPort: 5000
        #command: ["tail", "-f", "/dev/null"]
        env:
        - name: MLFLOW_RUN_ID
          value: {run.info.run_id}
        - name: MLFLOW_TRACKING_URI
          value: {os.environ['MLFLOW_TRACKING_URI']}          
        - name: MLFLOW_S3_ENDPOINT_URL
          value: {os.environ['MLFLOW_S3_ENDPOINT_URL']}
        - name: AWS_ACCESS_KEY_ID
          value: {os.environ['AWS_ACCESS_KEY_ID']}
        - name: AWS_SECRET_ACCESS_KEY
          value: {os.environ['AWS_SECRET_ACCESS_KEY']}
"""

service = """
apiVersion: v1
kind: Service
metadata:
  name: ml-server
spec:
  selector:
    app: ml-server
  ports:
    - protocol: TCP
      port: 80
      targetPort: 5000
"""

In [None]:
open("deployment.yaml", "w").write(deployment)
open("service.yaml", "w").write(service)

In [None]:
!cat deployment.yaml

In [None]:
!kubectl apply -f deployment.yaml
!kubectl apply -f service.yaml

In [None]:
!kubectl get services
!kubectl get po

In [None]:
!kubectl logs ml-deployment-7dcc7fd776-fcgff

In [None]:
!curl ml-server -H "Content-Type: application/json" \
-d "[0.03, 0.05, -0.002, -0.01, 0.04, 0.01, 0.08, -0.04, 0.005, -0.1]"

In [None]:
!python docs/examples/kubeflow-mlflow/load.py