In [0]:
import mlflow
import mlflow.sklearn
import pandas as pd
import matplotlib.pyplot as plt

from numpy import savetxt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [0]:
db = load_diabetes()
X = db.data
y = db.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [0]:
print ("Number of samples in X_train are:", len(X_train))
print ("Number of samples in X_test are:", len(X_test))
print ("Number of samples in y_train are:", len(y_train))
print ("Number of samples in y_test are:", len(y_test))

In [0]:
print(mlflow.version.VERSION)

In [0]:
n_estimators = 100
max_depth = 5
max_features = 3

rf= RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
rf.fit(X_train, y_train)

predictions = rf.predict(X_test)


In [0]:
experiment_name = "/Users/d.t.georgian.pirvu@axpo.com/Databricks ML/Diabetes MLFlow logging API 2"
mlflow.set_experiment(experiment_name)

In [0]:
mlflow.sklearn.autolog()
# mlflow.tensorflow.autolog()
with mlflow.start_run() as run:
    n_estimators = 100
    max_depth = 6
    max_features = 3    

    rf= RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)

    predictions = rf.predict(X_test)

    mlflow.log_param("num_trees_log", n_estimators)
    mlflow.log_param("max_depth_log", max_depth)
    mlflow.log_param("max_features_log", max_features)

    mse = mean_squared_error(y_test, predictions)

    mlflow.log_metric("mse_log", mse)

    mlflow.sklearn.log_model(rf, "random-forest-model")

    #save the table of predicted values
    savetxt("predictions.csv", predictions, delimiter=",")

    #log the saved table as artifact
    mlflow.log_artifact("predictions.csv")

    #convert residuals to a pandas df to take advantages of graphic features
    df=pd.DataFrame(data = predictions - y_test)

    #create a plot of residuals
    plt.plot(df)
    plt.xlabel("Observations")
    plt.ylabel("Residuals")
    plt.title("Residuals Plot")

    #save the plot and log it as artifact
    plt.savefig("residuals.png")
    mlflow.log_artifact("residuals.png")



In [0]:
experiment = mlflow.set_experiment(experiment_name)
print("Experiment id: {}".format(experiment.experiment_id))
print("Artifact location: {}".format(experiment.artifact_location))
print("Tags owner: {}".format(experiment.tags.get('mlflow.ownerEmail')))
print("Lifecycle stage: {}".format(experiment.lifecycle_stage))