In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import pickle
import mlflow

# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv(filepath_or_buffer=url,header=None,sep=',',names=names)
# Split-out validation dataset
array = dataset.values
X = array[:,0:4]
y = array[:,4]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1, shuffle=True)

classifier = LogisticRegression()
classifier.fit(X_train,y_train)

test_data = pd.DataFrame(X_test)
test_data.to_csv('data/test_data.csv', index='False')

# save the model to disk
pickle.dump(classifier, open('models/LRClassifier.pkl', 'wb'))

# load the model from disk
loaded_model = pickle.load(open('models/LRClassifier.pkl', 'rb'))

result = loaded_model.score(X_test, y_test)
print(result)



0.9666666666666667


In [2]:
def get_metrics(y_true, y_pred, y_pred_prob):
    from sklearn.metrics import accuracy_score,precision_score,recall_score,log_loss
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred,average='micro')
    recall = recall_score(y_true, y_pred,average='micro')
    entropy = log_loss(y_true, y_pred_prob)
    return {'accuracy': round(acc, 2), 'precision': round(prec, 2), 'recall': round(recall, 2), 'entropy': round(entropy, 2)}

In [9]:
y_pred = loaded_model.predict(X_test)
y_pred_prob = loaded_model.predict_log_proba(X_test)
run_metrics = get_metrics(y_test, y_pred, y_pred_prob)
print(run_metrics)

{'accuracy': 0.97, 'precision': 0.97, 'recall': 0.97, 'entropy': 1.1}


#### Let us utilize DVC and MLFlow for tracking and serving the datasets and models

### DVC

- Tracks datasets and ML projects
- It works with many types of storages (cloud, local, HDFS, HTTP...)
- Runs on top of GIT repository
- Supports building and running pipelines

### MLFlow model registry
- Run the below command from the terminal first before creating experiment and registering it
- `mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts --host 0.0.0.0 --port 5002`

In [10]:
def create_exp_and_register_model(experiment_name,run_name,run_metrics,model,confusion_matrix_path = None, 
                      roc_auc_plot_path = None, run_params=None):
    mlflow.set_tracking_uri("http://localhost:5002") 
    #use above line if you want to use any database like sqlite as backend storage for model else comment this line
    mlflow.set_experiment(experiment_name)
    with mlflow.start_run(run_name=run_name) as run:
        if not run_params == None:
            for param in run_params:
                mlflow.log_param(param, run_params[param])
            
        for metric in run_metrics:
            mlflow.log_metric(metric, run_metrics[metric])
        
        if not confusion_matrix_path == None:
            mlflow.log_artifact(confusion_matrix_path, 'confusion_materix')
            
        if not roc_auc_plot_path == None:
            mlflow.log_artifact(roc_auc_plot_path, "roc_auc_plot")
        
        mlflow.set_tag("tag1", "Basic_Classifier")
        mlflow.set_tags({"tag2":"Iris_dataset_classification", "tag3":"Production"})
        mlflow.sklearn.log_model(model, "model",registered_model_name="Iris-classifier")

In [11]:
# Naming the experiments for MLflow Tuned model
from datetime import datetime
experiment_name = "Iris_Classifier" + str(datetime.now().strftime("%d-%m-%y")) ##Tuned classifier
run_name="Iris_Classifier_Basic_Model" +str(datetime.now().strftime("%d-%m-%y"))
create_exp_and_register_model(experiment_name,run_name,run_metrics,loaded_model)

2023/01/18 07:13:22 INFO mlflow.tracking.fluent: Experiment with name 'Iris_Classifier18-01-23' does not exist. Creating a new experiment.
Successfully registered model 'Iris-classifier'.
2023/01/18 07:13:28 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: Iris-classifier, version 1
Created version '1' of model 'Iris-classifier'.


### Transitioning MLFlow model to production stage

In [12]:
client = mlflow.tracking.MlflowClient()
client.transition_model_version_stage(
    name="Iris-classifier",
    version=1,
    stage="Production"
)

<ModelVersion: creation_timestamp=1674006208679, current_stage='Production', description='', last_updated_timestamp=1674006307755, name='Iris-classifier', run_id='0c87200df2f844f8aa079ad25515ef99', run_link='', source='./artifacts/1/0c87200df2f844f8aa079ad25515ef99/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [13]:
# Predicting the test data with the Transistioned model

import mlflow.pyfunc

model_name = "Iris-classifier"
stage = 'Production'

model = mlflow.pyfunc.load_model(
    model_uri=f"models:/{model_name}/{stage}"
)

y_pred = model.predict(X_test)
print(y_pred)

['Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'
 'Iris-virginica' 'Iris-versicolor' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa'
 'Iris-virginica' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-setosa'
 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-virginica']


### MLFlow model serving

**Run this from command line**
-  use `set MLFLOW_TRACKING_URI=http://localhost:5002` for windows
- use `export MLFLOW_TRACKING_URI=http://localhost:5002` if in linux/mac

## **Now run this command from command line**

make sure we use to write the different port - other than the one you used while starting mlflow server

`mlflow models serve --model-uri models:/Iris-classifier/Production -p 6001 --env-manager=local`

In [15]:
# Predicting the results using endpiont created by serving the model

import requests
import pandas as pd
X_test = pd.read_csv('data/test_data.csv')
lst = X_test.values.tolist()
inference_request = {
        "data": lst
}
endpoint = "http://localhost:6001/invocations"
response = requests.post(endpoint, json=inference_request)
print(response)

<Response [200]>


In [16]:
print(response.text)

["Iris-setosa", "Iris-versicolor", "Iris-versicolor", "Iris-setosa", "Iris-virginica", "Iris-versicolor", "Iris-virginica", "Iris-setosa", "Iris-setosa", "Iris-virginica", "Iris-versicolor", "Iris-setosa", "Iris-virginica", "Iris-versicolor", "Iris-versicolor", "Iris-setosa", "Iris-versicolor", "Iris-versicolor", "Iris-setosa", "Iris-setosa", "Iris-versicolor", "Iris-versicolor", "Iris-virginica", "Iris-setosa", "Iris-virginica", "Iris-versicolor", "Iris-setosa", "Iris-setosa", "Iris-versicolor", "Iris-virginica"]
