In [2]:
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature

In [8]:
#load the dataset
X,y=datasets.load_iris(return_X_y=True)

# split data intro train and test sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

# Define the model hyperparameters
params={"penalty":"l2","solver":"lbfgs","max_iter":1000,"multi_class":"auto","random_state":423}

In [9]:
# train model
lr=LogisticRegression(**params)
lr.fit(X_train,y_train)



In [10]:
X_test.shape

(30, 4)

In [11]:
## Predictions on the test test
y_pred=lr.predict(X_test)
y_pred

array([1, 0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 2, 1, 1, 0, 1, 0, 0, 0, 1, 2, 2,
       1, 1, 0, 0, 2, 1, 0, 0])

In [12]:
accuracy=accuracy_score(y_test,y_pred)
accuracy

0.9666666666666667

In [13]:
### Mlflow tracking

In [3]:
# set the tracking uri
mlflow.set_tracking_uri(uri='http://127.0.0.1:5000')

In [4]:
# create a new MLflow Experiment
mlflow.set_experiment('MLflow Quickstart - radouane')

<Experiment: artifact_location='mlflow-artifacts:/411432170427355034', creation_time=1731382372921, experiment_id='411432170427355034', last_update_time=1731382372921, lifecycle_stage='active', name='MLflow Quickstart - radouane', tags={}>

In [16]:
# Start an MLflow run context. This groups all the logging, tagging, and model-saving actions together
# and assigns them to a specific run, allowing us to track this particular experiment.
with mlflow.start_run():

    # Log hyperparameters. Here, we log the parameters of the model (stored in `params`) 
    # to keep track of the model's configuration
    # for reproducibility and comparison across different runs.
    mlflow.log_params(params)

    # Log the model's accuracy metric to MLflow. We log accuracy here as an example, 
    # but we could log other metrics as well (e.g., precision, recall, F1-score).
    mlflow.log_metric("accuracy", accuracy) 

    # Set a tag to add extra information or context about this run. Tags can be used
    # to easily filter and identify runs later. Here, the tag "Training Info" describes the run's purpose.
    mlflow.set_tag("Training Info", "Basic LR model for iris data")   # Set a tag that we can use to remind ourselves what this run was for

    # Infer the model signature to record the model’s input and output schema.
    # `infer_signature` takes in an example input (X_train) and the model's predictions (lr.predict(X_train)),
    # then automatically infers the schema (data types and shapes) for both inputs and outputs.
    # This helps other users know the expected format for using this model and can prevent errors 
    # if the model is used with the wrong input types.
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model itself in MLflow. This saves the model artifact and links it to this run.
    # We specify several parameters:
    # - `sk_model`: the trained scikit-learn model object (`lr`) we want to save.
    # - `artifact_path`: the directory in MLflow’s artifact storage where the model will be saved.
    # - `signature`: the inferred input/output schema from `infer_signature`, which is saved with the model 
    #                to aid in later predictions.
    # - `input_example`: an example of the model's expected input format (e.g., X_train).
    # - `registered_model_name`: gives a name to the model so that it can be tracked in the MLflow model registry.
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart",
    )


Successfully registered model 'tracking-quickstart'.
2024/11/12 04:33:05 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 1
Created version '1' of model 'tracking-quickstart'.
2024/11/12 04:33:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run gentle-rook-466 at: http://127.0.0.1:5000/#/experiments/411432170427355034/runs/9716e4069be548b4a13ab972a5432718.
2024/11/12 04:33:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/411432170427355034.


In [17]:
# model v2
# Define the model hyperparameters
params={"solver":"newton-cg","max_iter":100,"random_state":800}


In [18]:
# train model 2
lr=LogisticRegression(**params)
lr.fit(X_train,y_train)

In [19]:
## Predictions on the test test
y_pred=lr.predict(X_test)
y_pred

array([1, 0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 2, 1, 1, 0, 1, 0, 0, 0, 1, 2, 2,
       1, 1, 0, 0, 2, 1, 0, 0])

In [20]:
accuracy=accuracy_score(y_test,y_pred)
accuracy

0.9666666666666667

In [None]:
# Start an MLflow run context. This groups all the logging, tagging, and model-saving actions together
# and assigns them to a specific run, allowing us to track this particular experiment.
with mlflow.start_run():

    # Log hyperparameters. Here, we log the parameters of the model (stored in `params`) 
    # to keep track of the model's configuration
    # for reproducibility and comparison across different runs.
    mlflow.log_params(params)

    # Log the model's accuracy metric to MLflow. We log accuracy here as an example, 
    # but we could log other metrics as well (e.g., precision, recall, F1-score).
    mlflow.log_metric("accuracy", accuracy) 

    # Set a tag to add extra information or context about this run. Tags can be used
    # to easily filter and identify runs later. Here, the tag "Training Info" describes the run's purpose.
    mlflow.set_tag("Training Info", "Basic LR model for iris data")   # Set a tag that we can use to remind ourselves what this run was for

    # Infer the model signature to record the model’s input and output schema.
    # `infer_signature` takes in an example input (X_train) and the model's predictions (lr.predict(X_train)),
    # then automatically infers the schema (data types and shapes) for both inputs and outputs.
    # This helps other users know the expected format for using this model and can prevent errors 
    # if the model is used with the wrong input types.
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model itself in MLflow. This saves the model artifact and links it to this run.
    # We specify several parameters:
    # - `sk_model`: the trained scikit-learn model object (`lr`) we want to save.
    # - `artifact_path`: the directory in MLflow’s artifact storage where the model will be saved.
    # - `signature`: the inferred input/output schema from `infer_signature`, which is saved with the model 
    #                to aid in later predictions.
    # - `input_example`: an example of the model's expected input format (e.g., X_train).
    # - `registered_model_name`: gives a name to the model so that it can be tracked in the MLflow model registry.
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart", # model registry (explained below)
    )


Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2024/11/12 04:33:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 2
Created version '2' of model 'tracking-quickstart'.
2024/11/12 04:33:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run calm-ape-576 at: http://127.0.0.1:5000/#/experiments/411432170427355034/runs/1462d943bedd496385ea908918cbfcff.
2024/11/12 04:33:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/411432170427355034.


Inferencing Model Artifacts With MLFlow Inferencing

Validate the model before deployment
* Run the following code to validate model inference works on the example payload, prior to deploying it to a serving endpoint

In [23]:
model_info.model_uri

'runs:/1462d943bedd496385ea908918cbfcff/iris_model'

In [24]:
from mlflow.models import validate_serving_input

model_uri = model_info.model_uri

# The model is logged with an input example. MLflow converts
# it into the serving payload format for the deployed model endpoint,
# and saves it to 'serving_input_payload.json'
serving_payload = """{
  "inputs": [
    [
      4.8,
      3.4,
      1.6,
      0.2
    ],
    [
      5.0,
      3.4,
      1.6,
      0.4
    ],
    [
      6.6,
      3.0,
      4.4,
      1.4
    ],
    [
      5.9,
      3.0,
      5.1,
      1.8
    ],
    [
      6.4,
      3.2,
      4.5,
      1.5
    ],
    [
      5.4,
      3.4,
      1.5,
      0.4
    ],
    [
      6.9,
      3.2,
      5.7,
      2.3
    ],
    [
      4.6,
      3.2,
      1.4,
      0.2
    ],
    [
      5.4,
      3.7,
      1.5,
      0.2
    ],
    [
      6.5,
      3.2,
      5.1,
      2.0
    ],
    [
      7.1,
      3.0,
      5.9,
      2.1
    ],
    [
      4.8,
      3.4,
      1.9,
      0.2
    ],
    [
      5.1,
      3.5,
      1.4,
      0.2
    ],
    [
      6.9,
      3.1,
      4.9,
      1.5
    ],
    [
      6.4,
      2.8,
      5.6,
      2.1
    ],
    [
      6.0,
      2.2,
      4.0,
      1.0
    ],
    [
      6.5,
      3.0,
      5.5,
      1.8
    ],
    [
      5.4,
      3.4,
      1.7,
      0.2
    ],
    [
      7.2,
      3.0,
      5.8,
      1.6
    ],
    [
      7.7,
      3.0,
      6.1,
      2.3
    ],
    [
      5.1,
      3.3,
      1.7,
      0.5
    ],
    [
      4.6,
      3.6,
      1.0,
      0.2
    ],
    [
      5.1,
      3.4,
      1.5,
      0.2
    ],
    [
      5.2,
      2.7,
      3.9,
      1.4
    ],
    [
      6.3,
      2.3,
      4.4,
      1.3
    ],
    [
      6.3,
      2.7,
      4.9,
      1.8
    ],
    [
      6.0,
      3.4,
      4.5,
      1.6
    ],
    [
      5.8,
      2.7,
      5.1,
      1.9
    ],
    [
      4.8,
      3.0,
      1.4,
      0.3
    ],
    [
      6.1,
      2.6,
      5.6,
      1.4
    ],
    [
      5.5,
      2.4,
      3.7,
      1.0
    ],
    [
      4.6,
      3.1,
      1.5,
      0.2
    ],
    [
      6.4,
      3.1,
      5.5,
      1.8
    ],
    [
      6.7,
      3.0,
      5.0,
      1.7
    ],
    [
      5.0,
      2.3,
      3.3,
      1.0
    ],
    [
      6.6,
      2.9,
      4.6,
      1.3
    ],
    [
      5.2,
      3.4,
      1.4,
      0.2
    ],
    [
      6.5,
      3.0,
      5.2,
      2.0
    ],
    [
      5.7,
      2.8,
      4.5,
      1.3
    ],
    [
      5.0,
      3.2,
      1.2,
      0.2
    ],
    [
      5.7,
      3.0,
      4.2,
      1.2
    ],
    [
      6.9,
      3.1,
      5.4,
      2.1
    ],
    [
      6.2,
      2.2,
      4.5,
      1.5
    ],
    [
      5.5,
      2.5,
      4.0,
      1.3
    ],
    [
      5.4,
      3.9,
      1.7,
      0.4
    ],
    [
      5.0,
      3.5,
      1.3,
      0.3
    ],
    [
      6.3,
      2.8,
      5.1,
      1.5
    ],
    [
      5.7,
      2.8,
      4.1,
      1.3
    ],
    [
      5.0,
      3.6,
      1.4,
      0.2
    ],
    [
      6.7,
      2.5,
      5.8,
      1.8
    ],
    [
      6.1,
      2.8,
      4.0,
      1.3
    ],
    [
      4.4,
      2.9,
      1.4,
      0.2
    ],
    [
      5.7,
      3.8,
      1.7,
      0.3
    ],
    [
      6.4,
      2.7,
      5.3,
      1.9
    ],
    [
      5.6,
      2.8,
      4.9,
      2.0
    ],
    [
      5.1,
      3.8,
      1.5,
      0.3
    ],
    [
      7.7,
      3.8,
      6.7,
      2.2
    ],
    [
      5.5,
      4.2,
      1.4,
      0.2
    ],
    [
      7.7,
      2.6,
      6.9,
      2.3
    ],
    [
      6.7,
      3.1,
      5.6,
      2.4
    ],
    [
      5.7,
      2.9,
      4.2,
      1.3
    ],
    [
      7.2,
      3.2,
      6.0,
      1.8
    ],
    [
      6.1,
      2.8,
      4.7,
      1.2
    ],
    [
      4.4,
      3.0,
      1.3,
      0.2
    ],
    [
      4.8,
      3.1,
      1.6,
      0.2
    ],
    [
      5.6,
      3.0,
      4.1,
      1.3
    ],
    [
      4.7,
      3.2,
      1.6,
      0.2
    ],
    [
      6.0,
      3.0,
      4.8,
      1.8
    ],
    [
      7.7,
      2.8,
      6.7,
      2.0
    ],
    [
      6.1,
      3.0,
      4.9,
      1.8
    ],
    [
      6.1,
      3.0,
      4.6,
      1.4
    ],
    [
      5.0,
      3.4,
      1.5,
      0.2
    ],
    [
      5.5,
      3.5,
      1.3,
      0.2
    ],
    [
      6.8,
      2.8,
      4.8,
      1.4
    ],
    [
      5.6,
      2.7,
      4.2,
      1.3
    ],
    [
      5.7,
      2.5,
      5.0,
      2.0
    ],
    [
      6.8,
      3.2,
      5.9,
      2.3
    ],
    [
      5.8,
      2.8,
      5.1,
      2.4
    ],
    [
      6.9,
      3.1,
      5.1,
      2.3
    ],
    [
      6.4,
      2.8,
      5.6,
      2.2
    ],
    [
      6.2,
      2.8,
      4.8,
      1.8
    ],
    [
      6.3,
      2.9,
      5.6,
      1.8
    ],
    [
      5.0,
      2.0,
      3.5,
      1.0
    ],
    [
      7.3,
      2.9,
      6.3,
      1.8
    ],
    [
      4.8,
      3.0,
      1.4,
      0.1
    ],
    [
      7.2,
      3.6,
      6.1,
      2.5
    ],
    [
      4.9,
      2.4,
      3.3,
      1.0
    ],
    [
      7.4,
      2.8,
      6.1,
      1.9
    ],
    [
      5.7,
      2.6,
      3.5,
      1.0
    ],
    [
      5.8,
      2.7,
      3.9,
      1.2
    ],
    [
      5.3,
      3.7,
      1.5,
      0.2
    ],
    [
      4.5,
      2.3,
      1.3,
      0.3
    ],
    [
      6.0,
      2.2,
      5.0,
      1.5
    ],
    [
      5.0,
      3.3,
      1.4,
      0.2
    ],
    [
      7.6,
      3.0,
      6.6,
      2.1
    ],
    [
      5.1,
      3.8,
      1.6,
      0.2
    ],
    [
      6.4,
      2.9,
      4.3,
      1.3
    ],
    [
      7.0,
      3.2,
      4.7,
      1.4
    ],
    [
      6.7,
      3.1,
      4.4,
      1.4
    ],
    [
      4.4,
      3.2,
      1.3,
      0.2
    ],
    [
      5.9,
      3.2,
      4.8,
      1.8
    ],
    [
      4.7,
      3.2,
      1.3,
      0.2
    ],
    [
      6.7,
      3.0,
      5.2,
      2.3
    ],
    [
      6.7,
      3.3,
      5.7,
      2.5
    ],
    [
      7.9,
      3.8,
      6.4,
      2.0
    ],
    [
      5.8,
      4.0,
      1.2,
      0.2
    ],
    [
      6.3,
      2.5,
      4.9,
      1.5
    ],
    [
      5.2,
      3.5,
      1.5,
      0.2
    ],
    [
      6.2,
      3.4,
      5.4,
      2.3
    ],
    [
      6.7,
      3.3,
      5.7,
      2.1
    ],
    [
      4.6,
      3.4,
      1.4,
      0.3
    ],
    [
      5.4,
      3.9,
      1.3,
      0.4
    ],
    [
      5.0,
      3.5,
      1.6,
      0.6
    ],
    [
      6.0,
      2.7,
      5.1,
      1.6
    ],
    [
      5.6,
      2.9,
      3.6,
      1.3
    ],
    [
      5.6,
      2.5,
      3.9,
      1.1
    ],
    [
      5.5,
      2.3,
      4.0,
      1.3
    ],
    [
      6.2,
      2.9,
      4.3,
      1.3
    ],
    [
      5.9,
      3.0,
      4.2,
      1.5
    ],
    [
      6.5,
      3.0,
      5.8,
      2.2
    ]
  ]
}"""

# Validate the serving payload works on the model
validate_serving_input(model_uri, serving_payload)

array([0, 0, 1, 2, 1, 0, 2, 0, 0, 2, 2, 0, 0, 1, 2, 1, 2, 0, 2, 2, 0, 0,
       0, 1, 1, 2, 1, 2, 0, 2, 1, 0, 2, 2, 1, 1, 0, 2, 1, 0, 1, 2, 1, 1,
       0, 0, 2, 1, 0, 2, 1, 0, 0, 2, 2, 0, 2, 0, 2, 2, 1, 2, 1, 0, 0, 1,
       0, 2, 2, 2, 1, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 0, 2, 1, 2,
       1, 1, 0, 0, 2, 0, 2, 0, 1, 1, 1, 0, 2, 0, 2, 2, 2, 0, 1, 0, 2, 2,
       0, 0, 0, 2, 1, 1, 1, 1, 1, 2])

* Load the model back for prediction as a generic python function model

In [25]:
loaded_model=mlflow.pyfunc.load_model(model_info.model_uri)
predictions=loaded_model.predict(X_test)

iris_features_name=datasets.load_iris().feature_names

result=pd.DataFrame(X_test,columns=iris_features_name)
result['actual_class']=y_test
result['predicted_class']=predictions

In [26]:
result

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,6.7,3.1,4.7,1.5,1,1
1,4.9,3.1,1.5,0.2,0,0
2,5.1,3.5,1.4,0.3,0,0
3,5.1,2.5,3.0,1.1,1,1
4,5.8,2.6,4.0,1.2,1,1
5,6.1,2.9,4.7,1.4,1,1
6,6.3,3.4,5.6,2.4,2,2
7,5.8,2.7,5.1,1.9,2,2
8,4.3,3.0,1.1,0.1,0,0
9,5.6,3.0,4.5,1.5,1,1


In [None]:
# Calcul de l'accuracy
accuracy = (result['actual_class'] == result['predicted_class']).mean()

print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9667


### MLFLOW Model Registry Tracking

The MLflow Model Registry component is a centralized model store, set of APIs, and UI, to collaboratively manage the full lifecycle of an MLflow Model. It provides model lineage (which MLflow experiment and run produced the model), model versioning, model aliasing, model tagging, and annotations.

Notes:
* Register After Validation: During the training and validation phase, do not register the model yet. 
* Once the model has been validated and meets the desired performance metrics, it can then be registered in the MLflow model registry.

Model Registration:


* You can register the model automatically by using the registered_model_name="....." parameter in mlflow.sklearn.log_model() after validation.
* Alternatively, you can register it manually in the MLflow UI by promoting the validated model version.

This approach ensures only validated models get registered and made available for inference.

In [5]:
## Inferencing from model from model registry

import mlflow.sklearn
model_name = "tracking-quickstart"
model_version = "latest"

model_uri = f"models:/{model_name}/{model_version}"

model = mlflow.sklearn.load_model(model_uri)
model

  latest = client.get_latest_versions(name, None if stage is None else [stage])


In [6]:
model_uri

'models:/tracking-quickstart/latest'

In [10]:
y_pred_new=model.predict(X_test)
y_pred_new

array([2, 1, 2, 0, 1, 1, 2, 1, 0, 1, 1, 2, 1, 2, 2, 2, 2, 1, 0, 2, 2, 0,
       1, 0, 2, 1, 0, 1, 2, 2])