In [1]:
%load_ext autoreload
%autoreload 2

# MLflow Regression Recipe Notebook

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [2]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")

2023/08/08 18:52:51 INFO mlflow.recipes.recipe: Creating MLflow Recipe '05-MLflow-Recipes' with profile: 'local'


In [3]:
r.inspect()

In [4]:
r.run("ingest")

2023/08/08 18:54:07 INFO mlflow.recipes.step: Running step ingest...


name,type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,number
fare_amount,number
pickup_zip,integer
dropoff_zip,integer

tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,fare_amount,pickup_zip,dropoff_zip
2016-02-13 21:47:53,2016-02-13 21:57:15,1.4,8.0,10103,10110
2016-02-13 18:29:09,2016-02-13 18:37:23,1.31,7.5,10023,10023
2016-02-06 19:40:58,2016-02-06 19:52:32,1.8,9.5,10001,10018
2016-02-12 19:06:43,2016-02-12 19:20:54,2.3,11.5,10044,10111
2016-02-23 10:27:56,2016-02-23 10:58:33,2.6,18.5,10199,10022


In [5]:
r.run("split")

2023/08/08 18:54:45 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


Run MLFlow Recipe step: split
2023/08/08 18:54:47 INFO mlflow.recipes.step: Running step split...


In [6]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,fare_amount,pickup_zip,dropoff_zip
count,7519,7519,7519.0,7519.0,7519.0,7519.0
mean,2016-02-02 05:45:11.307753728,2016-02-02 06:00:15.275834368,2.881774,12.437332,10137.503657,10173.910094
min,2016-01-01 00:11:29,2016-01-01 00:16:23,0.0,-8.0,7002.0,7002.0
25%,2016-01-17 14:04:02,2016-01-17 14:25:00,1.0,6.5,10012.0,10013.0
50%,2016-02-04 06:08:41,2016-02-04 06:18:34,1.7,9.0,10022.0,10023.0
75%,2016-02-17 14:36:39,2016-02-17 14:52:26.500000,3.1,14.0,10110.0,10119.0
max,2016-02-29 23:51:06,2016-02-29 23:59:38,30.6,275.0,11436.0,11691.0
std,,,3.497474,10.877017,339.482642,411.316685


In [7]:
r.run("transform")

2023/08/08 18:55:39 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


Run MLFlow Recipe step: transform
2023/08/08 18:55:40 INFO mlflow.recipes.step: Running step transform...


Name,Type
tpep_pickup_datetime,datetime64[ns]
tpep_dropoff_datetime,datetime64[ns]
trip_distance,float64
fare_amount,float64
pickup_zip,int32
dropoff_zip,int32

Name,Type
f_00,float64
f_01,float64
f_02,float64
f_03,float64
f_04,float64
f_05,float64
f_06,float64
f_07,float64
f_08,float64
f_09,float64

f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_27,f_28,f_29,f_30,f_31,f_32,fare_amount
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-0.423698,-0.110885,8.0
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-0.166352,-0.017175,11.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.08057,0.302542,18.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.078274,0.03049,28.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.111009,0.081723,15.0


In [8]:
r.run("train")

2023/08/08 18:56:22 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


Run MLFlow Recipe step: train
2023/08/08 18:56:23 INFO mlflow.recipes.step: Running step train...
2023/08/08 18:56:33 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2023/08/08 18:56:33 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.


Metric,training,validation
root_mean_squared_error,4.97987,3.94845
example_count,7519.0,1251.0
max_error,254.699,52.8758
mean_absolute_error,1.65136,1.76641
mean_absolute_percentage_error,92050800000000.0,0.157113
mean_on_target,12.4373,12.6875
mean_squared_error,24.7991,15.5903
r2_score,0.79036,0.861796
score,0.79036,0.861796
sum_on_target,93516.3,15872.0

Name,Type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,double
pickup_zip,integer
dropoff_zip,integer

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,fare_amount,tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,pickup_zip,dropoff_zip
254.698578,5.301422,260.0,2016-02-29 12:16:16,2016-02-29 12:16:53,0.0,8876,8876
211.263315,63.736685,275.0,2016-02-12 20:55:19,2016-02-12 21:52:38,20.85,10013,7008
100.402362,4.597638,105.0,2016-01-16 18:09:15,2016-01-16 18:09:23,0.0,7310,7310
77.757096,77.757096,0.0,2016-01-07 04:07:58,2016-01-07 04:43:08,26.3,10018,10606
72.916281,72.916281,0.0,2016-02-21 03:31:58,2016-02-21 03:32:28,25.0,10502,10502
51.867789,3.132211,55.0,2016-02-28 04:50:41,2016-02-28 04:52:32,0.18,10115,10027
48.21471,3.78529,52.0,2016-01-24 20:57:37,2016-01-24 20:57:52,0.0,10162,10162
47.368261,4.631739,52.0,2016-02-15 14:56:30,2016-02-15 15:52:31,0.0,11422,10171
46.386955,5.613045,52.0,2016-01-07 12:53:17,2016-01-07 12:53:53,0.0,11422,11422
45.048141,4.951859,50.0,2016-01-19 08:36:28,2016-01-19 08:37:22,0.0,10271,10271

Unnamed: 0,Latest
Model Rank,> 0
root_mean_squared_error,3.94845
max_error,52.8758
mean_absolute_error,1.76641
mean_absolute_percentage_error,0.157113
mean_squared_error,15.5903
Run Time,2023-08-08 18:56:24
Run ID,efffdef4899f4c51a34786bfe344c830


In [9]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: efffdef4899f4c51a34786bfe344c830



In [10]:
r.run("evaluate")

2023/08/08 18:58:13 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


Run MLFlow Recipe step: evaluate
2023/08/08 18:58:14 INFO mlflow.recipes.step: Running step evaluate...
2023/08/08 18:58:14 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2023/08/08 18:58:15 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.


Metric,validation,test
root_mean_squared_error,3.94845,2.385905
example_count,1251.0,1230.0
max_error,52.8758,20.480667
mean_absolute_error,1.76641,1.572496
mean_absolute_percentage_error,0.157113,0.499849
mean_on_target,12.6875,12.230496
mean_squared_error,15.5903,5.69254
r2_score,0.861796,0.941679
score,0.861796,0.941679
sum_on_target,15872.0,15043.51

metric,greater_is_better,value,threshold,validated
root_mean_squared_error,False,2.3859,10,✅


In [11]:
r.run("register")

2023/08/08 18:58:29 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


Run MLFlow Recipe step: register
2023/08/08 18:58:30 INFO mlflow.recipes.step: Running step register...
Registered model 'Regression-recipe' already exists. Creating a new version of this model...
2023/08/08 18:58:30 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: Regression-recipe, version 3
Created version '3' of model 'Regression-recipe'.
