In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
import pandas as pd

# MLflow Classification Recipe Notebook

This notebook runs the MLflow Classification Recipe on Databricks and inspects its results. For more information about the MLflow Classification Recipe, including usage examples, see the [Classification Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#classification-recipe) the [Classification Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.classification.v1.recipe).

In [26]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")

2024/12/05 09:51:38 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'mlflow-classification-recipe-learning' with profile: 'local'


In [27]:
r.inspect()

In [28]:
r.run("ingest")

2024/12/05 09:51:39 INFO mlflow.recipes.step: Running step ingest...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 17697.49it/s]
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 6307.22it/s] 


name,type
Pregnant,integer
Glucose,number
Diastolic_BP,number
Skin_Fold,number
Serum_Insulin,number
BMI,number
Diabetes_Pedigree,number
Age,integer
Class,integer

Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age,Class
1,89.0,66.0,23.0,94.0,28.1,0.167,21,0
0,137.0,40.0,35.0,168.0,43.1,2.288,33,1
3,78.0,50.0,32.0,88.0,31.0,0.248,26,1
2,197.0,70.0,45.0,543.0,30.5,0.158,53,1
1,189.0,60.0,23.0,846.0,30.1,0.398,59,1


In [29]:
r.run("split")

2024/12/05 09:51:40 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


Run MLflow Recipe step: split
2024/12/05 09:51:41 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [30]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age,Class
count,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0
mean,3.173203,121.653595,71.133987,29.179739,150.169935,33.171569,0.526255,30.679739,0.326797
std,3.092996,31.151732,12.524796,10.57884,114.774921,7.151197,0.361407,10.130074,0.469811
min,0.0,56.0,24.0,7.0,14.0,18.2,0.085,21.0,0.0
25%,1.0,98.25,64.0,21.0,74.25,28.4,0.273,23.0,0.0
50%,2.0,117.5,70.0,29.0,120.0,33.2,0.4435,27.0,0.0
75%,5.0,141.75,80.0,36.0,183.75,36.8,0.681,35.0,1.0
max,15.0,198.0,110.0,63.0,744.0,67.1,2.42,81.0,1.0


In [31]:
r.run("transform")

2024/12/05 09:51:42 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


Run MLflow Recipe step: transform
2024/12/05 09:51:43 INFO mlflow.recipes.step: Running step transform...


Name,Type
Pregnant,int64
Glucose,float64
Diastolic_BP,float64
Skin_Fold,float64
Serum_Insulin,float64
BMI,float64
Diabetes_Pedigree,float64
Age,int64
Class,int64

Name,Type
Pregnant,int64
Glucose,float64
Diastolic_BP,float64
Skin_Fold,float64
Serum_Insulin,float64
BMI,float64
Diabetes_Pedigree,float64
Age,int64
Class,int64

Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age,Class
1,89.0,66.0,23.0,94.0,28.1,0.167,21,0
1,103.0,30.0,38.0,83.0,43.3,0.183,33,0
3,126.0,88.0,41.0,235.0,39.3,0.704,27,0
1,97.0,66.0,15.0,140.0,23.2,0.487,22,0
3,88.0,58.0,11.0,54.0,24.8,0.267,22,0


In [32]:
r.run("train")

2024/12/05 09:51:44 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


Run MLflow Recipe step: train
2024/12/05 09:51:45 INFO mlflow.recipes.step: Running step train...
2024/12/05 09:51:45 INFO mlflow.recipes.utils.tracking: Experiment with name 'Custom_Function_Stacking' does not exist. Creating a new experiment.
2024/12/05 09:51:45 INFO mlflow.recipes.steps.train: Training data has less than 5000 rows, skipping rebalancing.

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|██        | 1/5 [00:00<00:00, 9597.95it/s]
Downloading artifacts:  40%|████      | 2/5 [00:00<00:00, 9927.35it/s]
Downloading artifacts:  60%|██████    | 3/5 [00:00<00:00, 11554.56it/s]
Downloading artifacts:  80%|████████  | 4/5 [00:00<00:00, 13066.37it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 14334.60it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 11821.60it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|██        | 1/5 [00:00<00:00, 20867.18it/s]
Downloading art

Metric,training,validation
f1_score,0.591716,0.727273
accuracy_score,0.77451,0.842105
example_count,306.0,38.0
false_negatives,50.0,3.0
false_positives,19.0,3.0
log_loss,0.454753,0.569121
precision_recall_auc,0.716452,0.631071
precision_score,0.724638,0.727273
recall_score,0.5,0.727273
roc_auc,0.888592,0.828283

Name,Type
Pregnant,long
Glucose,double
Diastolic_BP,double
Skin_Fold,double
Serum_Insulin,double
BMI,double
Diabetes_Pedigree,double
Age,long

Name,Type
predicted_score_0,float
predicted_score_1,float
predicted_score,float
predicted_label,long

absolute_error,prediction,Class,Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age
0.9763625264167786,0,1,2,155.0,74.0,17.0,96.0,26.6,0.433,27
0.9721060991287231,0,1,3,130.0,78.0,23.0,79.0,28.4,0.323,34
0.9664666652679444,0,1,14,100.0,78.0,25.0,184.0,36.6,0.412,46
0.9583752751350404,0,1,1,122.0,90.0,51.0,220.0,49.7,0.325,31
0.9494691491127014,0,1,4,109.0,64.0,44.0,99.0,34.8,0.905,26
0.9491817951202391,0,1,2,146.0,70.0,38.0,360.0,28.0,0.337,29
0.931069552898407,0,1,3,129.0,64.0,29.0,115.0,26.4,0.219,28
0.9238065481185912,0,1,6,119.0,50.0,22.0,176.0,27.1,1.318,33
0.9222877025604248,1,0,8,126.0,88.0,36.0,108.0,38.5,0.349,49
0.9082192182540894,0,1,0,138.0,60.0,35.0,167.0,34.6,0.534,21

Unnamed: 0,Latest
Model Rank,> 0
f1_score,0.727273
accuracy_score,0.842105
false_negatives,3
false_positives,3
log_loss,0.569121
precision_score,0.727273
recall_score,0.727273
roc_auc,0.828283
true_negatives,24


In [33]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: c583e1b6c8a34478be2a7ac7bde3f60a



In [34]:
r.run("evaluate")

2024/12/05 09:52:01 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


Run MLflow Recipe step: evaluate
2024/12/05 09:52:02 INFO mlflow.recipes.step: Running step evaluate...
2024/12/05 09:52:04 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/12/05 09:52:04 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/05 09:52:04 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/12/05 09:52:05 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/12/05 09:52:05 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/05 09:52:05 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


Metric,validation,test
f1_score,0.727273,0.722222
accuracy_score,0.842105,0.791667
example_count,38.0,48.0
false_negatives,3.0,6.0
false_positives,3.0,4.0
log_loss,0.569121,0.55572
precision_recall_auc,0.631071,0.806678
precision_score,0.727273,0.764706
recall_score,0.727273,0.684211
roc_auc,0.828283,0.822142

metric,greater_is_better,value,threshold,validated
f1_score,True,0.722222,0.99,❌
