This example notebook shows how to perform some of the steps from Databricks MLflow Model Registry example, available at: https://docs.databricks.com/_static/notebooks/mlflow/mlflow-model-registry-example.html

The "Faction CCV Settings" cell is Copyright 2021 Faction Group, LLC,  under the terms of the MIT license https://opensource.org/licenses/MIT

All other code is Copyright the original author(s) and is reproduced here for context.

In [0]:

import pandas as pd
wind_farm_data = pd.read_csv("https://github.com/dbczumar/model-registry-demo-notebook/raw/master/dataset/windfarm_data.csv", index_col=0)
 
def get_training_data():
  training_data = pd.DataFrame(wind_farm_data["2014-01-01":"2018-01-01"])
  X = training_data.drop(columns="power")
  y = training_data["power"]
  return X, y
 
def get_validation_data():
  validation_data = pd.DataFrame(wind_farm_data["2018-01-01":"2019-01-01"])
  X = validation_data.drop(columns="power")
  y = validation_data["power"]
  return X, y
 
def get_weather_and_forecast():
  format_date = lambda pd_date : pd_date.date().strftime("%Y-%m-%d")
  today = pd.Timestamp('today').normalize()
  week_ago = today - pd.Timedelta(days=5)
  week_later = today + pd.Timedelta(days=5)
  
  past_power_output = pd.DataFrame(wind_farm_data)[format_date(week_ago):format_date(today)]
  weather_and_forecast = pd.DataFrame(wind_farm_data)[format_date(week_ago):format_date(week_later)]
  if len(weather_and_forecast) < 10:
    past_power_output = pd.DataFrame(wind_farm_data).iloc[-10:-5]
    weather_and_forecast = pd.DataFrame(wind_farm_data).iloc[-10:]
 
  return weather_and_forecast.drop(columns="power"), past_power_output["power"]

In [0]:
wind_farm_data["2019-01-01":"2019-01-14"]

In [0]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [0]:
import mlflow
 
# See https://www.mlflow.org/docs/latest/tracking.html for details on tracking
# We are using a remote MLFLOW server with postgres with initdb pointed at the CCV + artifacts pointed at ccv, spawned with:
#    mlflow server --host 0.0.0.0 --backend-store-uri postgresql://mlflow_user:mlflow@localhost/mlflow_db --default-artifact-root file:///ccvs/multicloud/premier/artifacts
# see https://towardsdatascience.com/setup-mlflow-in-production-d72aecde7fef for an example of mflow+postgresql, but note both our backend-store and default-artifact are CCV-based
# postgres initialized with
#    initdb -D /ccvs/multicloud/premier/tracking/
tracking_uri = "http://10.220.200.60:5000"
mlflow.set_tracking_uri(tracking_uri)



In [0]:
def train_keras_model(X, y):
  
  model = Sequential()
  model.add(Dense(100, input_shape=(X_train.shape[-1],), activation="relu", name="hidden_layer"))
  model.add(Dense(1))
  model.compile(loss="mse", optimizer="adam")
 
  model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=.2)
  return model

In [0]:
import mlflow
import mlflow.keras
import mlflow.tensorflow
 
X_train, y_train = get_training_data()
mlflow.set_experiment("FCTNDEMO")
 
with mlflow.start_run():
  # Automatically capture the model's parameters, metrics, artifacts,
  # and source code with the `autolog()` function
  print(mlflow.get_tracking_uri())
  mlflow.tensorflow.autolog()
  
  train_keras_model(X_train, y_train)
  run_id = mlflow.active_run().info.run_id

In [0]:
model_name = "power-forecasting-model" # Replace this with the name of your registered model, if necessary.

In [0]:
# The default path where the MLflow autologging function stores the model
artifact_path = "model"
model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=run_id, artifact_path=artifact_path)
mr_uri = mlflow.get_registry_uri()
print("Current model registry uri: {}".format(mr_uri))

# Get the current tracking uri
tracking_uri = mlflow.get_tracking_uri()
print("Current tracking uri: {}".format(tracking_uri))
 
model_details = mlflow.register_model(model_uri=model_uri, name=model_name)

In [0]:
import time
from mlflow.tracking.client import MlflowClient
from mlflow.entities.model_registry.model_version_status import ModelVersionStatus
 
def wait_until_ready(model_name, model_version):
  client = MlflowClient()
  for _ in range(10):
    model_version_details = client.get_model_version(
      name=model_name,
      version=model_version,
    )
    status = ModelVersionStatus.from_string(model_version_details.status)
    print("Model status: %s" % ModelVersionStatus.to_string(status))
    if status == ModelVersionStatus.READY:
      break
    time.sleep(1)
  
wait_until_ready(model_details.name, model_details.version)