## Getting started with ML Project MLflow 
* Installing ML FLow

* Starting a local ML flow tracking server

* Logging and registering a model with mlflow

* Loading a logged model for inferance using MLflow's pyfunc flavor

* viewing the experiment results with ML flow UI


In [6]:
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature

In [7]:
##set the tracking uri
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [8]:
##load the dataset
datasets.load_iris()
X,y=datasets.load_iris(return_X_y=True)

In [9]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [10]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [11]:
#split the data in to training and test sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20)

# define the hyperparameters
params = {
    "penalty": "l2",  
    "solver": "lbfgs",  # or 'saga' (lbfgs doesn't work with L1)
    "max_iter": 1000,  
    "multi_class": "auto",  
    "random_state": 8888  
}

In [12]:
X_train,X_test,y_train,y_test

(array([[6.1, 3. , 4.6, 1.4],
        [6.1, 3. , 4.9, 1.8],
        [4.4, 3. , 1.3, 0.2],
        [5.7, 3. , 4.2, 1.2],
        [5.5, 2.4, 3.7, 1. ],
        [6.4, 3.1, 5.5, 1.8],
        [7.7, 3. , 6.1, 2.3],
        [5.8, 2.7, 5.1, 1.9],
        [5.4, 3.7, 1.5, 0.2],
        [6.3, 2.9, 5.6, 1.8],
        [5.5, 3.5, 1.3, 0.2],
        [5.8, 2.7, 4.1, 1. ],
        [5.9, 3. , 4.2, 1.5],
        [6.7, 3.3, 5.7, 2.1],
        [5. , 3.3, 1.4, 0.2],
        [6. , 2.7, 5.1, 1.6],
        [7.7, 2.6, 6.9, 2.3],
        [5.6, 2.7, 4.2, 1.3],
        [6.2, 2.8, 4.8, 1.8],
        [7.7, 3.8, 6.7, 2.2],
        [4.7, 3.2, 1.3, 0.2],
        [6.4, 2.8, 5.6, 2.2],
        [6.2, 2.9, 4.3, 1.3],
        [5.4, 3.4, 1.5, 0.4],
        [4.8, 3. , 1.4, 0.1],
        [4.6, 3.1, 1.5, 0.2],
        [6.7, 2.5, 5.8, 1.8],
        [4.4, 2.9, 1.4, 0.2],
        [6.8, 3. , 5.5, 2.1],
        [6.6, 3. , 4.4, 1.4],
        [6.3, 2.5, 5. , 1.9],
        [6.4, 2.7, 5.3, 1.9],
        [6.1, 2.8, 4.7, 1.2],
        [6

In [14]:
# train the model
lr= LogisticRegression(**params)
lr.fit(X_train,y_train)



In [15]:
#Prediction on the test set
y_pred=lr.predict(X_test)
y_pred


array([0, 2, 0, 0, 1, 0, 1, 1, 0, 1, 1, 2, 2, 0, 1, 2, 0, 2, 2, 0, 2, 2,
       1, 1, 1, 2, 2, 1, 2, 2])

In [16]:
X_test

array([[5. , 3.2, 1.2, 0.2],
       [5.8, 2.8, 5.1, 2.4],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3. , 1.6, 0.2],
       [6.8, 2.8, 4.8, 1.4],
       [4.9, 3.1, 1.5, 0.2],
       [5.4, 3. , 4.5, 1.5],
       [6.6, 2.9, 4.6, 1.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.6, 3. , 4.5, 1.5],
       [4.9, 2.4, 3.3, 1. ],
       [7.2, 3.2, 6. , 1.8],
       [6.3, 3.3, 6. , 2.5],
       [5. , 3.5, 1.6, 0.6],
       [5.5, 2.6, 4.4, 1.2],
       [6.9, 3.1, 5.4, 2.1],
       [5. , 3.6, 1.4, 0.2],
       [5.8, 2.7, 5.1, 1.9],
       [7.7, 2.8, 6.7, 2. ],
       [4.8, 3.1, 1.6, 0.2],
       [6.1, 2.6, 5.6, 1.4],
       [5.9, 3. , 5.1, 1.8],
       [6.4, 3.2, 4.5, 1.5],
       [5.6, 2.9, 3.6, 1.3],
       [6.1, 2.9, 4.7, 1.4],
       [6.5, 3. , 5.2, 2. ],
       [6.4, 2.8, 5.6, 2.1],
       [6.1, 2.8, 4. , 1.3],
       [6.5, 3. , 5.5, 1.8],
       [6.7, 3.3, 5.7, 2.5]])

In [18]:
accuracy=accuracy_score(y_test,y_pred)
accuracy
print(accuracy)

1.0


In [20]:
##ML flow tracking

import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature

mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

#create a mlflow experiment
mlflow.set_experiment("MLFOW Quickstart")

## start a mlflow run
with mlflow.start_run():
    ##log the hyperparatmeters
    mlflow.log_params(params)

    ## log the accuracy metrics
    mlflow.log_metric("accuracy",accuracy)

    # set a tag that we can use to remind ourselves what this run was for 
    mlflow.set_tag("Training Info","Basic LR model for this data")

    # infer the model signature
    signature=infer_signature(X_train,lr.predict(X_train))

    # log the model
    model_info=mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart"

    )

2025/06/04 08:39:03 INFO mlflow.tracking.fluent: Experiment with name 'MLFOW Quickstart' does not exist. Creating a new experiment.


Successfully registered model 'tracking-quickstart'.
2025/06/04 08:39:06 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 1


🏃 View run glamorous-grub-444 at: http://127.0.0.1:5000/#/experiments/776543985139464445/runs/7fe6a3c75d8042cdbcc7baf941576cac
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/776543985139464445


Created version '1' of model 'tracking-quickstart'.


In [23]:
# 2ND MODEL 
#split the data in to training and test sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20)

# define the hyperparameters
params = {
     
    "solver": "newton-cg",  # or 'saga' (lbfgs doesn't work with L1)
    "max_iter": 1000,  
    "multi_class": "auto",  
    "random_state": 1000  
}

In [24]:
# train the model
lr= LogisticRegression(**params)
lr.fit(X_train,y_train)



In [25]:
#Prediction on the test set
y_pred=lr.predict(X_test)
y_pred


array([2, 2, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 0, 1, 1, 2, 0, 2, 0,
       2, 1, 2, 0, 0, 0, 0, 2])

In [26]:
accuracy=accuracy_score(y_test,y_pred)
accuracy
print(accuracy)

0.9666666666666667


In [28]:
## start a mlflow run
with mlflow.start_run():
    ##log the hyperparatmeters
    mlflow.log_params(params)

    ## log the accuracy metrics
    mlflow.log_metric("accuracy",accuracy)

    # set a tag that we can use to remind ourselves what this run was for 
    mlflow.set_tag("Training Info","Basic LR model for this data")

    # infer the model signature
    signature=infer_signature(X_train,lr.predict(X_train))

    # log the model
    model_info=mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart"
    )

Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2025/06/04 08:48:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 2


🏃 View run nervous-shrimp-540 at: http://127.0.0.1:5000/#/experiments/776543985139464445/runs/2a59d8d9031d43698333bd4010249710
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/776543985139464445


Created version '2' of model 'tracking-quickstart'.


In [31]:
model_info.model_uri

'runs:/2a59d8d9031d43698333bd4010249710/iris_model'

## Inferencing and validating model

In [30]:
import mlflow
from mlflow.models import Model

model_uri = 'runs:/2a59d8d9031d43698333bd4010249710/iris_model'
# The model is logged with an input example
pyfunc_model = mlflow.pyfunc.load_model(model_uri)
input_data = pyfunc_model.input_example

# Verify the model with the provided input data using the logged dependencies.
# For more details, refer to:
# https://mlflow.org/docs/latest/models.html#validate-models-before-deployment
mlflow.models.predict(
    model_uri=model_uri,
    input_data=input_data,
    env_manager="conda",
)

2025/06/04 09:09:44 INFO mlflow.models.python_api: It is highly recommended to use `uv` as the environment manager for predicting with MLflow models as its performance is significantly better than other environment managers. Run `pip install uv` to install uv. See https://docs.astral.sh/uv/getting-started/installation for other installation methods.
2025/06/04 09:09:44 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
2025/06/04 09:09:45 INFO mlflow.utils.conda: === Creating conda environment /tmp/tmp92_xyhdf/envs/conda_envs/mlflow-1b1f98e3dddcb826057b81f356202bd1a9fcddf9-e76b024813fe80cf452bb00bb428206c50966e0d ===


  conda config --add channels defaults

For more information see https://docs.conda.io/projects/conda/en/stable/user-guide/configuration/use-condarc.html

  deprecated.topic(


Channels:
 - conda-forge
 - defaults
Platform: linux-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

python-3.10.16       | 23.9 MB   |            |   0% 
tk-8.6.13            | 3.1 MB    |            |   0% [A

openssl-3.5.0        | 3.0 MB    |            |   0% [A[A


pip-25.1             | 1.2 MB    |            |   0% [A[A[A



libsqlite-3.50.0     | 897 KB    |            |   0% [A[A[A[A




ncurses-6.5          | 871 KB    |            |   0% [A[A[A[A[A





libgcc-15.1.0        | 810 KB    |            |   0% [A[A[A[A[A[A






setuptools-80.9.0    | 731 KB    |            |   0% [A[A[A[A[A[A[A







ld_impl_linux-64-2.4 | 656 KB    |            |   0% [A[A[A[A[A[A[A[A








libgomp-15.1.0       | 442 KB    |            |   0% [A[A[A[A[A[A[A[A[A









readline-8.2         | 276 KB    |            |   0% [A[A[A[A[A[A[A[A[A[A










bzip2-1.0.8          | 24



    current version: 25.1.1
    latest version: 25.5.0

Please update conda by running

    $ conda update -n base -c defaults conda








libsqlite-3.50.0     | 897 KB    | ########## | 100% [A[A[A[A
tk-8.6.13            | 3.1 MB    | ########## | 100% [A

openssl-3.5.0        | 3.0 MB    |            |   1% [A[A




ncurses-6.5          | 871 KB    | 1          |   2% [A[A[A[A[A


pip-25.1             | 1.2 MB    | 1          |   1% [A[A[A





libgcc-15.1.0        | 810 KB    | 1          |   2% [A[A[A[A[A[A




python-3.10.16       | 23.9 MB   | ##         |  20% [A[A[A[A[A





libgcc-15.1.0        | 810 KB    | ########## | 100% [A[A[A[A[A[A






setuptools-80.9.0    | 731 KB    | 2          |   2% [A[A[A[A[A[A[A

openssl-3.5.0        | 3.0 MB    | #######8   |  78% [A[A







ld_impl_linux-64-2.4 | 656 KB    | 2          |   2% [A[A[A[A[A[A[A[A



libsqlite-3.50.0     | 897 KB    | ########## | 100% [A[A[A[A


pip-25.1             | 1.2 MB    | ########## | 100% [A[A[A


pip-25.1             | 1.2 MB    | ########## | 100% [A[A[A






setuptools-80.9

2025/06/04 09:10:55 INFO mlflow.utils.environment: === Running command '['bash', '-c', 'source /opt/conda/bin/../etc/profile.d/conda.sh && conda activate mlflow-1b1f98e3dddcb826057b81f356202bd1a9fcddf9-e76b024813fe80cf452bb00bb428206c50966e0d 1>&2 && python -c ""']'


/ Ran pip subprocess with arguments:
['/tmp/tmp92_xyhdf/envs/conda_envs/mlflow-1b1f98e3dddcb826057b81f356202bd1a9fcddf9-e76b024813fe80cf452bb00bb428206c50966e0d/bin/python', '-m', 'pip', 'install', '-U', '-r', '/tmp/tmpr47ajp_d/iris_model/condaenv.4d7eecuq.requirements.txt', '--exists-action=b']
Pip subprocess output:
Collecting mlflow==2.22.0 (from -r /tmp/tmpr47ajp_d/iris_model/condaenv.4d7eecuq.requirements.txt (line 1))
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting cloudpickle==3.1.1 (from -r /tmp/tmpr47ajp_d/iris_model/condaenv.4d7eecuq.requirements.txt (line 2))
  Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting numpy==2.2.6 (from -r /tmp/tmpr47ajp_d/iris_model/condaenv.4d7eecuq.requirements.txt (line 3))
  Downloading numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting pandas==2.2.3 (from -r /tmp/tmpr47ajp_d/iris_model/condaenv.4d7eecuq.requirements.txt (line 4))
  Downloading pa

2025/06/04 09:10:55 INFO mlflow.utils.environment: === Running command '['bash', '-c', 'source /opt/conda/bin/../etc/profile.d/conda.sh && conda activate mlflow-1b1f98e3dddcb826057b81f356202bd1a9fcddf9-e76b024813fe80cf452bb00bb428206c50966e0d 1>&2 && python /opt/conda/envs/testenv/lib/python3.10/site-packages/mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py --model-uri file:///tmp/tmpr47ajp_d/iris_model --content-type json --input-path /tmp/tmp6vi4ewkd/input.json']'


{"predictions": [2, 2, 1, 1, 0, 0, 0, 0, 2, 1, 1, 0, 2, 0, 1, 2, 2, 0, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 1, 0, 0, 0, 2, 2, 0, 1, 0, 1, 2, 1, 0, 1, 2, 0, 1, 1, 0, 1, 2, 2, 2, 1, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 0, 0, 2, 2, 1, 2, 1, 2, 0, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 2, 1, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 2, 2, 0, 2]}

# Load the model back for prediction as a generic python fucntion model

In [32]:
loaded_model=mlflow.pyfunc.load_model(model_info.model_uri)
predictions=loaded_model.predict(X_test)

iris_features_name=datasets.load_iris().feature_names

result=pd.DataFrame(X_test,columns=iris_features_name)

result["acutual_class"]=y_test
result["predicted_class"]=predictions

In [33]:
result

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),acutual_class,predicted_class
0,7.1,3.0,5.9,2.1,2,2
1,6.4,3.2,5.3,2.3,2,2
2,5.0,2.0,3.5,1.0,1,1
3,6.9,3.1,4.9,1.5,1,1
4,5.1,3.4,1.5,0.2,0,0
5,6.2,3.4,5.4,2.3,2,2
6,6.1,2.6,5.6,1.4,2,2
7,6.9,3.2,5.7,2.3,2,2
8,6.8,3.2,5.9,2.3,2,2
9,5.1,3.8,1.6,0.2,0,0


In [34]:
result[:5]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),acutual_class,predicted_class
0,7.1,3.0,5.9,2.1,2,2
1,6.4,3.2,5.3,2.3,2,2
2,5.0,2.0,3.5,1.0,1,1
3,6.9,3.1,4.9,1.5,1,1
4,5.1,3.4,1.5,0.2,0,0


# Model Registry

MLflow Model Registry: Overview
The MLflow Model Registry is a centralized hub for managing ML models throughout their lifecycle—from development to staging and production. Set of api's and UI

It provides:model lineage , model aliasing, model tagging , and annotations

Version Control – Track different versions of models.

Stage Management – Move models between Staging, Production, and Archived.

Annotations – Add descriptions, tags, or metadata.

Collaboration – Teams can review, approve, or reject model transitions.

In [35]:
## start a mlflow run
with mlflow.start_run():
    ##log the hyperparatmeters
    mlflow.log_params(params)

    ## log the accuracy metrics
    mlflow.log_metric("accuracy",1.0)

    # set a tag that we can use to remind ourselves what this run was for 
    mlflow.set_tag("Training Info2","Basic LR model for this data")

    # infer the model signature
    signature=infer_signature(X_train,lr.predict(X_train))

    # log the model
    model_info=mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        # registered_model_name="tracking-quickstart" without registry 
    )

🏃 View run bemused-carp-771 at: http://127.0.0.1:5000/#/experiments/776543985139464445/runs/3f64287ac3434d98a409c2d92a6d3801
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/776543985139464445


## inferencing form the models from model registry

In [37]:
import mlflow.sklearn

model_name="tracking-quickstart"
model_version="latest"

model_uri=f"models:/{model_name}/{model_version}"

model=mlflow.sklearn.load_model(model_uri)
model


In [38]:
model_uri

'models:/tracking-quickstart/latest'

In [39]:
y_pred_new=model.predict(X_test)
y_pred_new

array([2, 2, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 0, 1, 1, 2, 0, 2, 0,
       2, 1, 2, 0, 0, 0, 0, 2])