## MLFLow Basics

In [21]:
import mlflow

In [22]:
# Set the MLflow tracking URI to the local server

mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [95]:
# Create a new experiment

mlflow.set_experiment("Check localhost connection") # Name of the experiment

<Experiment: artifact_location='mlflow-artifacts:/172592810939307593', creation_time=1729358618737, experiment_id='172592810939307593', last_update_time=1729358618737, lifecycle_stage='active', name='Check localhost connection', tags={}>

In [5]:
# Start a new run

with mlflow.start_run(): # Run the code inside this block and log the parameters and metrics
    mlflow.log_metric("test",1) # Log a metric
    mlflow.log_metric("Vipul",2) # Log a metric

2024/10/19 19:26:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run judicious-bee-514 at: http://127.0.0.1:5000/#/experiments/172592810939307593/runs/c6979a77df0347a999ae4914ccdcfc68.
2024/10/19 19:26:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/172592810939307593.


## ML Project iris dataset with MLFLOW

- Installing MLflow.

- Starting a local MLflow Tracking Server.

- Logging and registering a model with MLflow.

- Loading a logged model for inference using MLflow’s pyfunc flavor.

- Viewing the experiment results in the MLflow UI.

In [23]:
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature

In [24]:
## set the tracking uri
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

In [25]:
## load the dataset
X,y=datasets.load_iris(return_X_y=True)

In [26]:
Features=pd.DataFrame(X,columns=['sepal_length','sepal_width','petal_length','petal_width'])

In [27]:
target=pd.DataFrame(y,columns=['species'])

In [41]:
# split the data into training and test sets
X_train,X_test,y_train,y_test=train_test_split(Features,target,test_size=0.20)

In [42]:
X_train

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
30,4.8,3.1,1.6,0.2
145,6.7,3.0,5.2,2.3
141,6.9,3.1,5.1,2.3
139,6.9,3.1,5.4,2.1
31,5.4,3.4,1.5,0.4
...,...,...,...,...
39,5.1,3.4,1.5,0.2
148,6.2,3.4,5.4,2.3
71,6.1,2.8,4.0,1.3
78,6.0,2.9,4.5,1.5


In [43]:
print(X_train.shape,X_test.shape)

(120, 4) (30, 4)


In [44]:
# Define the model hyperparameters
params = {"penalty":"l2","solver": "lbfgs", "max_iter": 1000, "multi_class": "auto", "random_state": 8888}

##train the model

logistic_regression=LogisticRegression(**params)
logistic_regression.fit(X_train,y_train)

  y = column_or_1d(y, warn=True)


In [45]:
## Prediction on the test set
y_pred=logistic_regression.predict(X_test)
y_pred

array([0, 1, 2, 0, 0, 0, 0, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 1, 0, 1, 1, 2,
       0, 1, 0, 1, 1, 2, 2, 2])

In [46]:
accuracy=accuracy_score(y_test,y_pred)
print(accuracy)

0.9666666666666667


## MLFLOW Tracking

In [48]:
### MLFLOW tracking
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

##create a new MLFLOW experiment
mlflow.set_experiment("IRIS Dataset experiment")

## Sstart an MLFLOW run
with mlflow.start_run() as run:
    ## log the hyperparameters
    mlflow.log_params(params)

    ## Log the accuracy metrics
    mlflow.log_metric("accuracy",accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic Logistic Regression model for iris data")

    ## Infer the model signature
    signature=infer_signature(X_train,logistic_regression.predict(X_train))

    ## log the model
    model_info=mlflow.sklearn.log_model(
        sk_model=logistic_regression,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="logistic_regression-1",
    ) 

2024/10/19 21:44:43 INFO mlflow.tracking.fluent: Experiment with name 'IRIS Dataset experiment' does not exist. Creating a new experiment.


Registered model 'logistic_regression-1' already exists. Creating a new version of this model...
2024/10/19 21:44:52 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression-1, version 2
Created version '2' of model 'logistic_regression-1'.
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 21.88it/s]
2024/10/19 21:44:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run casual-owl-311 at: http://127.0.0.1:5000/#/experiments/194413563217996245/runs/e76876363cc7498db16c991415cc874a.
2024/10/19 21:44:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/194413563217996245.


## 2nd Experiment with new Parameters

In [52]:
# Define the model hyperparameters
params = {"solver": "newton-cg", "max_iter": 1000, "multi_class": "auto", "random_state": 1000}

##train the model

logistic_regression_2=LogisticRegression(**params)
logistic_regression_2.fit(X_train,y_train)


  y = column_or_1d(y, warn=True)


In [53]:
y_pred=logistic_regression_2.predict(X_test)
y_pred

array([0, 1, 2, 0, 0, 0, 0, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 1, 0, 1, 1, 2,
       0, 1, 0, 1, 1, 2, 2, 2])

In [54]:
accuracy=accuracy_score(y_test,y_pred)
print(accuracy)

0.9666666666666667


In [55]:
## Start an MLFLOW run

with mlflow.start_run() as run:
    ## log the hyperparameters
    mlflow.log_params(params)

    ## Log the accuracy metrics
    mlflow.log_metric("accuracy", 0.9210526315789473)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic Logistic Regression model for iris data with new parameters")

    ## Infer the model signature

    signature=infer_signature(X_train,logistic_regression_2.predict(X_train))

    ## log the model
    model_info=mlflow.sklearn.log_model(
        sk_model=logistic_regression,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="logistic_regression-2",
    ) 

Successfully registered model 'logistic_regression-2'.
2024/10/19 21:51:01 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression-2, version 1
Created version '1' of model 'logistic_regression-2'.
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 15.43it/s]
2024/10/19 21:51:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run worried-ape-603 at: http://127.0.0.1:5000/#/experiments/194413563217996245/runs/75c5734022ea4da7b3a58d8b69bd854e.
2024/10/19 21:51:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/194413563217996245.


In [None]:
model_info.model_uri

'runs:/46ce7d9f08d941bca3d4108aa511ba15/iris_model'