# MLFlow 

In [1]:
import mlflow
import numpy as np
import pandas as pd
import joblib
import csv
import json
import os
import sklearn
import mlflow.sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import IsolationForest
from datetime import datetime
from mlflow.tracking import MlflowClient
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename

mlflow.tracking.get_tracking_uri()
exp_name = "evaluate_metric"
mlflow.set_experiment(exp_name)

filedf = "fraud_detector.csv"
df = pd.read_csv(filedf)  

#Ttraining and testing dataset
X = df.drop("Category",axis=1)
y = df.Category
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=40)

#Load model
model = joblib.load(open("model.pkl", 'rb'))
#model= IsolationForest(n_estimators=100, max_samples=len(X_train),random_state=0, verbose=0)   
#model.fit(X_train,y_train)

ypred= model.predict(X_test)

ypred[ypred == 1] = 0 #normal
ypred[ypred == -1] = 1 #possibly fraud

def eval_metrics(actual, pred):
    # compute relevant metrics
    acc_score = accuracy_score(y_test,ypred)
    return acc_score

def load_data(filedf):
    df = pd.read_csv(filedf)  
    X = df.drop("Category",axis=1)
    y = df.Category
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=42)
    return X_train, y_train, X_test, y_test

def main(n_estimators=40, max_samples=len(X_train)):
    # train a model with given parameters
    np.random.seed(42)

    # Read csv file 
    filedf = "fraud_detector.csv"
    train_x, train_y, test_x, test_y = load_data(filedf)

    # Useful for multiple runs     
    with mlflow.start_run():
        # Load model
        model = joblib.load(open("model.pkl", 'rb'))
                
        ypred[ypred == 1] = 0 #normal
        ypred[ypred == -1] = 1 #possibly fraud
            
        #Freeze Model with joblib
        filename_pkl = 'model.pkl'
        joblib.dump(model, open(filename_pkl, 'wb'))
                
        # Evaluate Metrics
        predicted_qualities = model.predict(X_test)
        (acc_score) = eval_metrics(y_test, predicted_qualities)

        # Print out metrics
        print("evaluate_metric (n_estimators=%f, max_samples=%f):" % (n_estimators, max_samples))
        print("  ACCURACY SCORE: %s" % acc_score)
       
        # Log parameter, metrics, and model to MLflow
        mlflow.log_param(key="n_estimators", value=n_estimators)
        mlflow.log_param(key="max_samples", value=max_samples)
        mlflow.log_metrics({"accuracy score":acc_score})
        mlflow.log_artifact(filedf)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(model, "model")

with mlflow.start_run():
     for epoch in range(0, 3):
        mlflow.log_metric(key="quality", value=2*epoch, step=epoch)   
        
main(100,600)
print('\n')
main(110,630)

2022/01/15 16:35:37 INFO mlflow.tracking.fluent: Experiment with name 'evaluate_metric' does not exist. Creating a new experiment.


evaluate_metric (n_estimators=100.000000, max_samples=600.000000):
  ACCURACY SCORE: 0.9766666666666667
Save to: file:///C:/Users/HENNY/Documents/PYTHON/mlflow_project/mlruns/1/f0085241d45647bc857e46e724046884/artifacts


evaluate_metric (n_estimators=110.000000, max_samples=630.000000):
  ACCURACY SCORE: 0.9766666666666667
Save to: file:///C:/Users/HENNY/Documents/PYTHON/mlflow_project/mlruns/1/e31ddbc4503841aeac3aade558bff921/artifacts


In [2]:
from datetime import datetime
from mlflow.tracking import MlflowClient

client = MlflowClient()
experiments = client.list_experiments() # returns a list of mlflow.entities.Experiment
print(experiments)

[<Experiment: artifact_location='file:///C:/Users/HENNY/Documents/PYTHON/mlflow_project/mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>, <Experiment: artifact_location='file:///C:/Users/HENNY/Documents/PYTHON/mlflow_project/mlruns/1', experiment_id='1', lifecycle_stage='active', name='evaluate_metric', tags={}>]


In [3]:
# get the run
_run = client.get_run(run_id="e31ddbc4503841aeac3aade558bff921")
print(_run)

<Run: data=<RunData: metrics={'accuracy score': 0.9766666666666667}, params={'max_samples': '630', 'n_estimators': '110'}, tags={'mlflow.log-model.history': '[{"run_id": "e31ddbc4503841aeac3aade558bff921", '
                             '"artifact_path": "model", "utc_time_created": '
                             '"2022-01-15 09:35:45.038400", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"loader_module": "mlflow.sklearn", '
                             '"python_version": "3.8.8", "env": "conda.yaml"}, '
                             '"sklearn": {"pickled_model": "model.pkl", '
                             '"sklearn_version": "0.23.2", '
                             '"serialization_format": "cloudpickle"}}}]',
 'mlflow.source.name': 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'HENNY'}>, info=<RunInfo: artifact_uri='file:///C:/Users/HEN

In [4]:
# add a tag to the run
dt = datetime.now().strftime("%d-%m-%Y (%H:%M:%S.%f)")
client.set_tag(_run.info.run_id, "deployed", dt)
print(client.set_tag)
print('\n')
print(dt)

<bound method MlflowClient.set_tag of <mlflow.tracking.client.MlflowClient object at 0x0000026BAC227CA0>>


15-01-2022 (16:45:35.943287)


In [5]:
%%writefile main.py

import mlflow
import numpy as np
import pandas as pd
import joblib
import csv
import json
import os
import sklearn
import mlflow.sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import IsolationForest
from datetime import datetime
from mlflow.tracking import MlflowClient
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename

exp_name = "evaluate_metric"
mlflow.set_experiment(exp_name)

filedf = "fraud_detector.csv"
df = pd.read_csv(filedf)  

#Ttraining and testing dataset
X = df.drop("Category",axis=1)
y = df.Category
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=40)

model = joblib.load(open("model.pkl", 'rb'))
#model= IsolationForest(n_estimators=100, max_samples=len(X_train),random_state=0, verbose=0)   
#model.fit(X_train,y_train)

ypred= model.predict(X_test)

ypred[ypred == 1] = 0 #normal
ypred[ypred == -1] = 1 #possibly fraud

def eval_metrics(actual, pred):
    # compute relevant metrics
    acc_score = accuracy_score(y_test,ypred)
    return acc_score

def load_data(filedf):
    df = pd.read_csv(filedf)  
    X = df.drop("Category",axis=1)
    y = df.Category
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=42)
    return X_train, y_train, X_test, y_test

def main(n_estimators=40, max_samples=len(X_train)):
    # train a model with given parameters
    np.random.seed(42)

    # Read csv file 
    filedf = "fraud_detector.csv"
    train_x, train_y, test_x, test_y = load_data(filedf)

    # Useful for multiple runs     
    with mlflow.start_run():
        # Execute 
        model = joblib.load(open("model.pkl", 'rb'))
        #model= IsolationForest(n_estimators=n_estimators, max_samples=max_samples,random_state=0, verbose=0)   
        #model.fit(X_train,y_train)
        
        ypred[ypred == 1] = 0 #normal
        ypred[ypred == -1] = 1 #possibly fraud
            
        #Freeze Model with joblib
        filename_pkl = 'model.pkl'
        joblib.dump(model, open(filename_pkl, 'wb'))
        print("model.pkl saved")
        
        # Evaluate Metrics
        predicted_qualities = model.predict(X_test)
        (acc_score) = eval_metrics(y_test, predicted_qualities)

        # Print out metrics
        print("evaluate_metric (n_estimators=%f, max_samples=%f):" % (n_estimators, max_samples))
        print("  ACCURACY SCORE: %s" % acc_score)
       
        # Log parameter, metrics, and model to MLflow
        mlflow.log_param(key="n_estimators", value=n_estimators)
        mlflow.log_param(key="max_samples", value=max_samples)
        mlflow.log_metrics({"accuracy score":acc_score})
        mlflow.log_artifact(filedf)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(model, "model")

with mlflow.start_run():
     for epoch in range(0, 3):
        mlflow.log_metric(key="quality", value=2*epoch, step=epoch)        
        
main(107, 680)

#if __name__ == "__main__":
#    mlflow ui

Writing main.py


# Automate scheduled training

In [4]:
%%writefile trigger_train.py

def trigger_train(): 
   
    import sklearn
    from sklearn.metrics import confusion_matrix
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report,accuracy_score
    from sklearn.ensemble import IsolationForest
    import joblib
    import datetime
    import requests
    import warnings
    warnings.filterwarnings('ignore')
       
    filedf = 'fraud_detector.csv'
    df= pd.read_csv(filedf)
    X = df.drop("Category",axis=1)
    y = df.Category
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=40)

    model= IsolationForest(n_estimators=100, max_samples=len(X_train),random_state=0, verbose=0)   
    model.fit(X_train,y_train)
    #model = joblib.load(open("model.pkl", 'rb'))
   
    ypred= model.predict(X_test)
    ypred[ypred == 1] = 0 #normal
    ypred[ypred == -1] = 1 #possibly fraud 
    
    #Freeze Model with joblib
    filename_pkl = 'model.pkl'
    joblib.dump(model, open(filename_pkl, 'wb'))
    print("model.pkl saved")
    
#Automate scheduled training    
#mlflow.autolog({"run_id":"749eb2eaf2a84e1992110481c7a7a7a9"})  
trigger_train()

import schedule
schedule.every(720).hours.do(trigger_train)

Writing trigger_train.py


#### From terminal type 
(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> mlflow ui

(envi1) (base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> mlflow ui
INFO:waitress:Serving on http://127.0.0.1:5000

In [None]:
#schedule.every(720).hours.do(trigger_train)  
#schedule.every(10).seconds.do(trigger_train)
#schedule.every(15).minutes.do(trigger_train)
#schedule.every().day.at('09:01').do(trigger_train)

In [12]:
mlflow.end_run()

In [2]:
%%writefile app.py

import sklearn
import scipy
import numpy as np
import pandas as pd
import csv
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import IsolationForest
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename
import json
import joblib

app = Flask(__name__)

@app.route("/status")
def status():
    return "success"

@app.route("/", methods=['GET', 'POST'])
def index():
    A1 = request.args.get("A1", None)
    A2 = request.args.get("A2", None)

    #request_value = request.get_json()

    #A1 = int(request_value["A1"])
    #A2 = int(request_value["A2"])

    if A1 != None:
        y_new = predict(A1, A2)
    else:
        y_new = ""

    write(A1, A2, y_new)
    return (
        """<form action="" method="get">
                A1 input: <input type="text" name="A1">
                A2 input: <input type="text" name="A2">
                <input type="submit" value="A1 & A2 input for Predict Fraud or Not">
            </form>"""

        + "y_new: "
        + str(y_new)
    )

@app.route("/json", methods=['GET', 'POST'])
def jsonify():
    request_value = request.get_json()
    return request_value

def write(A1, A2, y_new):
    filedf = "fraud_detector.csv"
    # write new data into csv
    with open(filedf, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([A1, A2, y_new])
        print("file written")

def predict(A1, A2):
    """Predict Fraud or Not Fraud."""
    print("predicting")

    model = joblib.load(open("model.pkl", 'rb'))
    X_new = np.array([A1, A2]).reshape(1, -1)
    y_new = model.predict(X_new)

    y_new[y_new == 1] = 0  # normal
    y_new[y_new == -1] = 1  # possibly fraud

    y_new = (int(y_new))
    return y_new

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=int("5000"), debug=True, use_reloader=False)


Overwriting app.py


#### Create image and Docker Container  
(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> pip install --user virtualenv

(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> python -m venv envi1

(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project>.\envi1\Scripts\activate

(envi1)(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> pip install -r requirements.txt

(envi1)(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> docker build -t image01 .

(envi1)(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> docker run --name container01 -p 5000:5000 image01
