In [1]:
%load_ext autoreload
%autoreload 2

# MLflow Regression Recipe Notebook

This notebook runs the MLflow Regression Recipe and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [2]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")

2024/11/17 07:23:37 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'regression_recipes' with profile: 'local'


In [3]:
r.clean()

In [4]:
r.inspect()

In [5]:
r.run("ingest")

2024/11/17 07:23:38 INFO mlflow.recipes.step: Running step ingest...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 1811.01it/s]
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 1158.65it/s]


name,type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,number
fare_amount,number
pickup_zip,integer
dropoff_zip,integer

tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,fare_amount,pickup_zip,dropoff_zip
2016-02-13 21:47:53,2016-02-13 21:57:15,1.4,8.0,10103,10110
2016-02-13 18:29:09,2016-02-13 18:37:23,1.31,7.5,10023,10023
2016-02-06 19:40:58,2016-02-06 19:52:32,1.8,9.5,10001,10018
2016-02-12 19:06:43,2016-02-12 19:20:54,2.3,11.5,10044,10111
2016-02-23 10:27:56,2016-02-23 10:58:33,2.6,18.5,10199,10022


In [6]:
r.run("split")

2024/11/17 07:23:38 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


Run MLflow Recipe step: split
2024/11/17 07:23:39 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [7]:
r.run("transform")

2024/11/17 07:23:41 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


Run MLflow Recipe step: transform
2024/11/17 07:23:42 INFO mlflow.recipes.step: Running step transform...


Name,Type
tpep_pickup_datetime,datetime64[us]
tpep_dropoff_datetime,datetime64[us]
trip_distance,float64
fare_amount,float64
pickup_zip,int32
dropoff_zip,int32

Name,Type
f_00,float64
f_01,float64
f_02,float64
f_03,float64
f_04,float64
f_05,float64
f_06,float64
f_07,float64
f_08,float64
f_09,float64

f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_27,f_28,f_29,f_30,f_31,f_32,fare_amount
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-0.4211383004880428,-0.1112577507186591,8.0
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-0.3063002625513086,-0.0670297075321655,9.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-0.1627527151303908,-0.0144251410148967,11.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.0766241866778401,0.3159449391508809,18.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0909437793780183,0.0348288161700619,28.5


In [8]:
r.run("train")

2024/11/17 07:23:43 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


Run MLflow Recipe step: train
2024/11/17 07:23:43 INFO mlflow.recipes.step: Running step train...

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|██        | 1/5 [00:00<00:00, 8683.86it/s]
Downloading artifacts:  40%|████      | 2/5 [00:00<00:00, 11413.07it/s]
Downloading artifacts:  60%|██████    | 3/5 [00:00<00:00, 14413.42it/s]
Downloading artifacts:  80%|████████  | 4/5 [00:00<00:00, 17015.43it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 19293.03it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 13582.59it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|██        | 1/5 [00:00<00:00, 6754.11it/s]
Downloading artifacts:  40%|████      | 2/5 [00:00<00:00, 6388.89it/s]
Downloading artifacts:  60%|██████    | 3/5 [00:00<00:00, 5814.65it/s]
Downloading artifacts:  80%|████████  | 4/5 [00:00<00:00, 6116.37it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 6747.

Metric,training,validation
root_mean_squared_error,4.88902,3.92421
weighted_mean_squared_error,24.7796,17.6921
example_count,8051.0,959.0
max_error,254.516,52.7717
mean_absolute_error,1.65949,1.78055
mean_absolute_percentage_error,84426600000000.0,0.158525
mean_on_target,12.3947,13.1517
mean_squared_error,23.9026,15.3994
r2_score,0.79541,0.870413
score,0.79541,0.870413

Name,Type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,double
pickup_zip,integer
dropoff_zip,integer

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,fare_amount,tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,pickup_zip,dropoff_zip
254.5161186756574,5.483881324342602,260.0,2016-02-29 12:16:16,2016-02-29 12:16:53,0.0,8876,8876
212.4896258124134,62.510374187586606,275.0,2016-02-12 20:55:19,2016-02-12 21:52:38,20.85,10013,7008
100.24839858677171,4.751601413228288,105.0,2016-01-16 18:09:15,2016-01-16 18:09:23,0.0,7310,7310
76.2468377838344,76.2468377838344,0.0,2016-01-07 04:07:58,2016-01-07 04:43:08,26.3,10018,10606
71.4838806443745,71.4838806443745,0.0,2016-02-21 03:31:58,2016-02-21 03:32:28,25.0,10502,10502
51.80193381428718,3.1980661857128183,55.0,2016-02-28 04:50:41,2016-02-28 04:52:32,0.18,10115,10027
48.52551272061145,3.474487279388549,52.0,2016-01-10 23:56:29,2016-01-10 23:56:56,0.0,10035,10035
48.10806320594574,3.891936794054257,52.0,2016-01-24 20:57:37,2016-01-24 20:57:52,0.0,10162,10162
47.07005229260545,4.929947707394549,52.0,2016-02-15 14:56:30,2016-02-15 15:52:31,0.0,11422,10171
46.19873319293832,5.801266807061683,52.0,2016-01-07 12:53:17,2016-01-07 12:53:53,0.0,11422,11422

Unnamed: 0,Latest
Model Rank,> 0
root_mean_squared_error,3.92421
weighted_mean_squared_error,17.6921
max_error,52.7717
mean_absolute_error,1.78055
mean_absolute_percentage_error,0.158525
mean_squared_error,15.3994
Run Time,2024-11-17 07:23:44
Run ID,0d70475943384e6c9c04a6fc556644a1


In [9]:
r.run("evaluate")

2024/11/17 07:23:52 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


Run MLflow Recipe step: evaluate
2024/11/17 07:23:53 INFO mlflow.recipes.step: Running step evaluate...
2024/11/17 07:23:54 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/11/17 07:23:54 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/11/17 07:23:54 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/11/17 07:23:54 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/11/17 07:23:54 INFO mlflow.tracking._tracking_service.client: 🏃 View run chill-fly-301 at: http://127.0.0.1:5000/#/experiments/775956625588661267/runs/0d70475943384e6c9c04a6fc556644a1.
2024/11/17 07:23:54 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/775956625588661267.


Metric,validation,test
root_mean_squared_error,3.92421,2.261969
weighted_mean_squared_error,17.6921,3.478776
example_count,959.0,990.0
max_error,52.7717,17.987546
mean_absolute_error,1.78055,1.567069
mean_absolute_percentage_error,0.158525,0.598125
mean_on_target,13.1517,12.15102
mean_squared_error,15.3994,5.116504
r2_score,0.870413,0.947016
score,0.870413,0.947016

metric,greater_is_better,value,threshold,validated
root_mean_squared_error,False,2.26197,10,✅
mean_absolute_error,False,1.56707,50,✅
weighted_mean_squared_error,False,3.47878,50,✅


In [10]:
r.run("register")

2024/11/17 07:23:54 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


Run MLflow Recipe step: register
2024/11/17 07:23:55 INFO mlflow.recipes.step: Running step register...
Registered model 'taxi_fare_regressor' already exists. Creating a new version of this model...
2024/11/17 07:23:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: taxi_fare_regressor, version 3
Created version '3' of model 'taxi_fare_regressor'.


In [11]:
r.inspect("train")

Metric,training,validation
root_mean_squared_error,4.88902,3.92421
weighted_mean_squared_error,24.7796,17.6921
example_count,8051.0,959.0
max_error,254.516,52.7717
mean_absolute_error,1.65949,1.78055
mean_absolute_percentage_error,84426600000000.0,0.158525
mean_on_target,12.3947,13.1517
mean_squared_error,23.9026,15.3994
r2_score,0.79541,0.870413
score,0.79541,0.870413

Name,Type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,double
pickup_zip,integer
dropoff_zip,integer

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,fare_amount,tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,pickup_zip,dropoff_zip
254.5161186756574,5.483881324342602,260.0,2016-02-29 12:16:16,2016-02-29 12:16:53,0.0,8876,8876
212.4896258124134,62.510374187586606,275.0,2016-02-12 20:55:19,2016-02-12 21:52:38,20.85,10013,7008
100.24839858677171,4.751601413228288,105.0,2016-01-16 18:09:15,2016-01-16 18:09:23,0.0,7310,7310
76.2468377838344,76.2468377838344,0.0,2016-01-07 04:07:58,2016-01-07 04:43:08,26.3,10018,10606
71.4838806443745,71.4838806443745,0.0,2016-02-21 03:31:58,2016-02-21 03:32:28,25.0,10502,10502
51.80193381428718,3.1980661857128183,55.0,2016-02-28 04:50:41,2016-02-28 04:52:32,0.18,10115,10027
48.52551272061145,3.474487279388549,52.0,2016-01-10 23:56:29,2016-01-10 23:56:56,0.0,10035,10035
48.10806320594574,3.891936794054257,52.0,2016-01-24 20:57:37,2016-01-24 20:57:52,0.0,10162,10162
47.07005229260545,4.929947707394549,52.0,2016-02-15 14:56:30,2016-02-15 15:52:31,0.0,11422,10171
46.19873319293832,5.801266807061683,52.0,2016-01-07 12:53:17,2016-01-07 12:53:53,0.0,11422,11422

Unnamed: 0,Latest
Model Rank,> 0
root_mean_squared_error,3.92421
weighted_mean_squared_error,17.6921
max_error,52.7717
mean_absolute_error,1.78055
mean_absolute_percentage_error,0.158525
mean_squared_error,15.3994
Run Time,2024-11-17 07:23:44
Run ID,0d70475943384e6c9c04a6fc556644a1


In [12]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,fare_amount,pickup_zip,dropoff_zip
count,8051,8051,8051.0,8051.0,8051.0,8051.0
mean,2016-02-02 06:05:52.669606,2016-02-02 06:20:46.721898,2.866895,12.394709,10138.588995,10175.038753
min,2016-01-01 00:11:29,2016-01-01 00:16:23,0.0,-8.0,7002.0,7002.0
25%,2016-01-17 14:22:28.500000,2016-01-17 14:37:50.500000,1.0,6.5,10012.0,10013.0
50%,2016-02-04 06:53:36,2016-02-04 07:01:31,1.7,9.0,10022.0,10023.0
75%,2016-02-17 14:32:53,2016-02-17 14:45:28,3.06,14.0,10110.0,10119.0
max,2016-02-29 23:51:06,2016-02-29 23:59:38,30.6,275.0,11436.0,11691.0
std,,,3.483383,10.809536,340.490906,410.985094


In [13]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: 0d70475943384e6c9c04a6fc556644a1

