## Notebook Objectives: Detail drive of Model Registry, Staging and Model Serving:

In [41]:
import mlflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns
import warnings

# Suppress a specific warning by category
warnings.filterwarnings("ignore", category=DeprecationWarning)

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,precision_score,recall_score,log_loss, confusion_matrix

In [2]:
mlflow.__version__

'2.6.0'

## S-1: Model Training Life Cycle:

In [3]:
# Data Loading:
def load_data(url):
    # Load dataset
    data = pd.read_csv(filepath_or_buffer=url,sep=',')
    return data

In [4]:
# Data Splitting:
def data_split(final_data,target_column):
    X = final_data.loc[:, final_data.columns != target_column]
    y = final_data.loc[:, final_data.columns == target_column]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,stratify = y, random_state=47)
    return X_train, X_test, y_train, y_test

In [5]:
# Model for Clasification: Logistic Regression
def training_basic_classifier(X_train,y_train):
    classifier = LogisticRegression()
    classifier.fit(X_train,y_train)
    
    return classifier

In [6]:
def training_rf_classifier(X_train,y_train):
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(n_estimators=101)
    model.fit(X_train, y_train)
    
    return model

In [7]:
# Prediction for Test Data:
def predict_on_test_data(model,X_test):
    y_pred = model.predict(X_test)
    return y_pred

In [8]:
# Prediction Probabilty for Test Data: Optional
def predict_prob_on_test_data(model,X_test):
    y_pred = model.predict_proba(X_test)
    return y_pred

In [9]:
# Metrics creation
def get_metrics(y_true, y_pred, y_pred_prob):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred,average='micro')
    recall = recall_score(y_true, y_pred,average='micro')
    entropy = log_loss(y_true, y_pred_prob)
    return {'accuracy': round(acc, 2), 'precision': round(prec, 2), 'recall': round(recall, 2), 'entropy': round(entropy, 2)}

In [10]:
# def create_confusion_matrix_plot(y_true, y_pred, labels=None, save_path=None):
#     # Compute the confusion matrix
#     mat = confusion_matrix(y_true, y_pred)

#     # Generate labels for the matrix if not provided
#     if labels is None:
#         labels = range(len(mat))

#     # Create a heatmap plot
#     plt.figure(figsize=(8, 6))
#     sns.set(font_scale=1.2)  # Adjust the font size
#     sns.heatmap(mat, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
#     plt.xlabel('Predicted Labels')
#     plt.ylabel('True Labels')
#     plt.title('Confusion Matrix')

#     # Save the plot if a save_path is provided
#     if save_path:
#         plt.savefig(save_path, bbox_inches='tight')

#     # Display the plot
#     plt.show()

In [11]:
# Confusion Matrix
def create_confusion_matrix_plot(y_true, y_pred):
    mat = confusion_matrix(y_true, y_pred)
    # print(mat)
    # plt.show()
    return mat

## S-2: Performing Model Training

In [12]:
# Data Loading
url = './Iris.csv'
data = load_data(url)
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [13]:
# Train-Test Split
target_column = "Species"
X_train, X_test, y_train, y_test = data_split(data, target_column)

In [14]:
X_test.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
26,5.0,3.4,1.6,0.4
41,4.5,2.3,1.3,0.3
49,5.0,3.3,1.4,0.2
44,5.1,3.8,1.9,0.4
141,6.9,3.1,5.1,2.3


In [15]:
# Model training
model = training_basic_classifier(X_train,y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [16]:
y_pred = predict_on_test_data(model,X_test)     # Test data prediction
y_pred

array([0, 0, 0, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 0, 0, 2, 0, 1, 1, 1, 1, 2,
       2, 0, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 0, 0, 2, 2, 2, 1, 1, 2, 2, 1,
       1])

In [17]:
y_pred_prob = predict_prob_on_test_data(model,X_test)       # Test Prediction probability
y_pred_prob

array([[9.60709889e-01, 3.92891122e-02, 9.98779633e-07],
       [9.34702567e-01, 6.52968331e-02, 5.99767945e-07],
       [9.70974210e-01, 2.90255308e-02, 2.58847505e-07],
       [9.54953581e-01, 4.50439371e-02, 2.48163810e-06],
       [1.74813395e-04, 1.07986185e-01, 8.91839001e-01],
       [1.59414983e-05, 8.15768455e-02, 9.18407213e-01],
       [3.57053116e-03, 7.13593488e-01, 2.82835980e-01],
       [4.31196406e-04, 1.90652756e-01, 8.08916047e-01],
       [8.32139704e-03, 8.65320067e-01, 1.26358536e-01],
       [1.95079123e-02, 8.41162257e-01, 1.39329831e-01],
       [2.92676901e-05, 4.04386355e-02, 9.59532097e-01],
       [7.76931170e-03, 8.63228862e-01, 1.29001827e-01],
       [7.45332011e-04, 3.66637758e-01, 6.32616910e-01],
       [9.67661210e-01, 3.23384432e-02, 3.46568517e-07],
       [9.77115276e-01, 2.28845278e-02, 1.96720668e-07],
       [1.69574652e-07, 8.50651690e-03, 9.91493314e-01],
       [9.59223233e-01, 4.07764110e-02, 3.55763420e-07],
       [1.76334516e-03, 6.28534

In [18]:
run_metrics = get_metrics(y_test, y_pred, y_pred_prob)      # Showing the matrics
run_metrics

{'accuracy': 0.93, 'precision': 0.93, 'recall': 0.93, 'entropy': 0.17}

In [19]:
conf_mat = create_confusion_matrix_plot(y_test, y_pred)
conf_mat

array([[15,  0,  0],
       [ 0, 13,  2],
       [ 0,  1, 14]])

In [20]:
type(conf_mat)

numpy.ndarray

In [21]:
# Save the confusion matrix as a file (e.g., a text file)
np.savetxt("./confusion_matrix.txt", conf_mat, fmt='%d', delimiter='\t')

## S-3: Create Experiment for Mlflow Tracking

In [22]:
def create_experiment(experiment_name,run_name,model_name, run_metrics,model, confusion_matrix_path = None, 
                    run_params=None):
    import mlflow
    mlflow.set_tracking_uri("http://localhost:5000") 
    #use above line if you want to use any database like sqlite as backend storage for model else comment this line
    mlflow.set_experiment(experiment_name)
    
    with mlflow.start_run(run_name=run_name):
        
        if not run_params == None:
            for param in run_params:
                mlflow.log_param(param, run_params[param])
            
        for metric in run_metrics:
            mlflow.log_metric(metric, run_metrics[metric])
        
        
        if not confusion_matrix_path == None:
            mlflow.log_artifact(confusion_matrix_path, 'confusion_matrix')
            # Log the confusion matrix file as an artifact
            # mlflow.log_artifact("confusion_matrix.txt", artifact_path="your_artifact_path")
        
        mlflow.set_tag("Model", model_name)
        mlflow.set_tags({"Developers":"Indra-Inc", "Classification Type":"Multiclassification"})
        mlflow.sklearn.log_model(model, "model")
    print('Run - %s is logged to Experiment - %s' %(run_name, experiment_name))

## S-4: Start Mlflow Server
- We will use SQLite as backend so run it on CLI: mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts --host 0.0.0.0 --port 5000 

In [23]:
from datetime import datetime
experiment_name = "Iris_Classifier_"+ str(datetime.now().strftime("%d-%m-%y")) ##basic classifier
run_name="iris_classifier_"+str(datetime.now().strftime("%d-%m-%y"))
model_name = "Logistic_without_Optimization"
create_experiment(experiment_name,run_name, model_name, run_metrics,model,'confusion_matrix.txt')

2023/09/10 23:02:25 INFO mlflow.tracking.fluent: Experiment with name 'Iris_Classifier_10-09-23' does not exist. Creating a new experiment.


Run - iris_classifier_10-09-23 is logged to Experiment - Iris_Classifier_10-09-23


In [24]:
## Now from mlflow artifact section copy paste the code and execute it to check the prediction

logged_model = 'runs:/1dfd4459ba49408fbedf735b884e3608/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pdi
loaded_model.predict(pd.DataFrame(X_test))

array([0, 0, 0, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 0, 0, 2, 0, 1, 1, 1, 1, 2,
       2, 0, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 0, 0, 2, 2, 2, 1, 1, 2, 2, 1,
       1])

### Model_2: Random Forest Classifier

In [25]:
model_rf = training_rf_classifier(X_train,y_train)
y_pred_rf = predict_on_test_data(model_rf,X_test)
y_pred_prob_rf = predict_prob_on_test_data(model_rf,X_test)
run_metrics_rf = get_metrics(y_test, y_pred_rf, y_pred_prob_rf)
conf_mat_rf = create_confusion_matrix_plot(y_test, y_pred_rf)
np.savetxt("./confusion_matrix_rf.txt", conf_mat_rf, fmt='%d', delimiter='\t')

  return fit_method(estimator, *args, **kwargs)


## S-5: Mlflow Model Registry

### Method-1: mlflow.<model_flavor e.g here sklearn>.log_model()
- ```
    # Log the sklearn model and register as version 1
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="sklearn-model",
        signature=signature,
        registered_model_name="sk-learn-random-forest-reg-model",
    )


In [26]:

def create_exp_and_register_model(experiment_name,run_name,model_name, run_metrics,model, confusion_matrix_path = None, run_params=None):
    mlflow.set_tracking_uri("http://localhost:5000") 
    #use above line if you want to use any database like sqlite as backend storage for model else comment this line
    mlflow.set_experiment(experiment_name)
    with mlflow.start_run(run_name=run_name) as run:
        if not run_params == None:
            for param in run_params:
                mlflow.log_param(param, run_params[param])
            
        for metric in run_metrics:
            mlflow.log_metric(metric, run_metrics[metric])
        
        
        if not confusion_matrix_path == None:
            mlflow.log_artifact(confusion_matrix_path, 'confusion_matrix')
        
        mlflow.set_tag("Model", model_name)
        mlflow.set_tags({"Developers":"Indra-Inc", "Classification Type":"Multiclassification"})
        mlflow.sklearn.log_model(model, "model",registered_model_name="iris-classifier-rf")
    print('Run - %s is logged to Experiment - %s' %(run_name, experiment_name))

In [27]:
# experiment_name = "iris_classifier_rf_method-1_" + str(datetime.now().strftime("%d-%m-%y")) ## random forest classifier
run_name="iris_classifier_rf_method-1_" +str(datetime.now().strftime("%d-%m-%y"))
model_name_rf = "Random_Forest_without_Optimization"
create_exp_and_register_model(experiment_name,run_name, model_name_rf, run_metrics_rf,model_rf,'confusion_matrix_rf.txt')

Successfully registered model 'iris-classifier-rf'.
2023/09/10 23:06:11 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: iris-classifier-rf, version 1


Run - iris_classifier_rf_method-1_10-09-23 is logged to Experiment - Iris_Classifier_10-09-23


Created version '1' of model 'iris-classifier-rf'.


### Method-2: mlflow.register_model()
-  mlflow.register_model() method, after all your experiment runs complete and when you have decided which model is most suitable to add to the registry. For this method, you will need the run_id as part of the runs:URI argument.
- `result = mlflow.register_model(
    "runs:/d16076a3ec534311817565e6527539c0/sklearn-model", "sk-learn-random-forest-reg"
)`

In [28]:
# with mlflow.start_run(run_name=run_name) as run:
result = mlflow.register_model(
        "runs:/1dfd4459ba49408fbedf735b884e3608/model",
        "iris-classifier-lr-1"
    )

Successfully registered model 'iris-classifier-lr-1'.
2023/09/11 00:02:18 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: iris-classifier-lr-1, version 1
Created version '1' of model 'iris-classifier-lr-1'.


## S-6: Fetching Mlflow Model from the Model Registry
- As now we have the models in our Model Registry

In [29]:
import mlflow.pyfunc

In [30]:
# Fetching Specific Model Version: As i am taking first model i.e. Logistic Regression Classifier

model_name = "iris-classifier-lr-1"
model_version = 1

model_reg_lr_1 = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")

In [31]:
y_pred = model_reg_lr_1.predict(X_test)
y_pred

array([0, 0, 0, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 0, 0, 2, 0, 1, 1, 1, 1, 2,
       2, 0, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 0, 0, 2, 2, 2, 1, 1, 2, 2, 1,
       1])

In [None]:
## To see the prediction probability
# sklearn_model = mlflow.sklearn.load_model(
#     model_uri=f"models:/{model_name}/{model_version}"
# )
# y_pred_prob = sklearn_model.predict_proba(X_test)
# print(y_pred_prob)

## S-7: Transition model into Staging area:

In [32]:
from mlflow.tracking import MlflowClient

In [33]:
## Staging the RF model into Stage
client = MlflowClient()
client.transition_model_version_stage(
    name="iris-classifier-rf", version=1, stage="Staging"
)

<ModelVersion: aliases=[], creation_timestamp=1694367371512, current_stage='Staging', description='', last_updated_timestamp=1694373571496, name='iris-classifier-rf', run_id='907b4fded771424ea6403845fad2f492', run_link='', source='/media/indra-inc/ML_Doc/mflow_projects/basic_model_serving_1/artifacts/1/907b4fded771424ea6403845fad2f492/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [34]:
# Staging the LR model into Production
client = MlflowClient()
client.transition_model_version_stage(
    name="iris-classifier-lr-1", version=1, stage="Production"
)

<ModelVersion: aliases=[], creation_timestamp=1694370738140, current_stage='Production', description='', last_updated_timestamp=1694373578156, name='iris-classifier-lr-1', run_id='1dfd4459ba49408fbedf735b884e3608', run_link='', source='/media/indra-inc/ML_Doc/mflow_projects/basic_model_serving_1/artifacts/1/1dfd4459ba49408fbedf735b884e3608/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [35]:
# Fetching Specific Model Version from Production Staging area

model_name = "iris-classifier-lr-1"
stage = "Production"

model_prod_lr_1 = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{stage}")

In [36]:
## Prediction for Batched Data
y_pred = model_prod_lr_1.predict(X_test)
y_pred

array([0, 0, 0, 0, 2, 2, 1, 2, 1, 1, 2, 1, 2, 0, 0, 2, 0, 1, 1, 1, 1, 2,
       2, 0, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 0, 0, 2, 2, 2, 1, 1, 2, 2, 1,
       1])

In [39]:
# Prediction for Single data from Staging area for RF model
model_name = "iris-classifier-rf"
stage = 'Staging'

model_stg_rf = mlflow.sklearn.load_model(
    model_uri=f"models:/{model_name}/{stage}"
)


In [42]:
y_pred = model_stg_rf.predict([[6.7,3.3,5.7,2.1]])
y_pred



array([2])

## S-8: Serving Mlflow model from Model Registry
- mlflow.set_tracking_uri('http://localhost:5000')
#### To Set environment variable for the tracking URL where the Model Registry resides
- Run this from CLI `set MLFLOW_TRACKING_URI=http://localhost:5000` for Windows `export MLFLOW_TRACKING_URI=http://localhost:5000 ` for Linux
#### To Serve the production model from the model registry
- mlflow models serve --model-uri models:/iris-classifier-lr-1/Production -p 1234 --no-conda
or
- mlflow models serve -m "models:/iris-classifier-lr-1/Production"

In [44]:
## For Single Data Prediction

# import requests

# inference_request = {
#         "dataframe_records": [[6.7,3.3,5.7,2.1]]
# }

# endpoint = "http://localhost:1234/prediction"

# response = requests.post(endpoint, json=inference_request)

# print(response.text)

In [None]:
## For Batch Prediction
# lst = X_test.values.tolist()
# inference_request = {
#         "dataframe_records": lst
# }

# endpoint = "http://localhost:1234/prediction"

# response = requests.post(endpoint, json=inference_request)

# print(response.text)