In [None]:
https://github.com/hennypurwadi/mlflow_mlop

# Automate scheduled training

In [4]:
#%%writefile trigger_train.py

def trigger_train(): 
   
    import sklearn
    from sklearn.metrics import confusion_matrix
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report,accuracy_score
    from sklearn.ensemble import IsolationForest
    import joblib
    import datetime
    import requests
    import warnings
    warnings.filterwarnings('ignore')
       
    filedf = 'fraud_detector.csv'
    df= pd.read_csv(filedf)
    X = df.drop("Category",axis=1)
    y = df.Category
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=40)

    model= IsolationForest(n_estimators=100, max_samples=len(X_train),random_state=0, verbose=0)   
    model.fit(X_train,y_train)
    #model = joblib.load(open("model.pkl", 'rb'))
   
    ypred= model.predict(X_test)
    ypred[ypred == 1] = 0 #normal
    ypred[ypred == -1] = 1 #possibly fraud 
    
    #Freeze Model with joblib
    filename_pkl = 'model.pkl'
    joblib.dump(model, open(filename_pkl, 'wb'))
    print("model.pkl saved")
    
#Automate scheduled training    
#mlflow.autolog({"run_id":"749eb2eaf2a84e1992110481c7a7a7a9"})  
trigger_train()

import schedule
schedule.every(720).hours.do(trigger_train)

model.pkl saved


Every 720 hours do trigger_train() (last run: [never], next run: 2022-02-14 22:31:01)

# MLFlow 

In [1]:
import mlflow
import numpy as np
import pandas as pd
import joblib
import csv
import json
import os
import sklearn
import mlflow.sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import IsolationForest
from datetime import datetime
from mlflow.tracking import MlflowClient
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename

mlflow.tracking.get_tracking_uri()
exp_name = "evaluate_metric"
mlflow.set_experiment(exp_name)

filedf = "fraud_detector.csv"
df = pd.read_csv(filedf)  

#Ttraining and testing dataset
X = df.drop("Category",axis=1)
y = df.Category
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=40)

#Load model
model = joblib.load(open("model.pkl", 'rb'))
#model= IsolationForest(n_estimators=100, max_samples=len(X_train),random_state=0, verbose=0)   
#model.fit(X_train,y_train)

ypred= model.predict(X_test)

ypred[ypred == 1] = 0 #normal
ypred[ypred == -1] = 1 #possibly fraud

def eval_metrics(actual, pred):
    # compute relevant metrics
    acc_score = accuracy_score(y_test,ypred)
    return acc_score

def load_data(filedf):
    df = pd.read_csv(filedf)  
    X = df.drop("Category",axis=1)
    y = df.Category
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=42)
    return X_train, y_train, X_test, y_test

def main(n_estimators=40, max_samples=len(X_train)):
    # train a model with given parameters
    np.random.seed(42)

    # Read csv file 
    filedf = "fraud_detector.csv"
    train_x, train_y, test_x, test_y = load_data(filedf)

    # Useful for multiple runs     
    with mlflow.start_run():
        # Load model
        model = joblib.load(open("model.pkl", 'rb'))
                
        ypred[ypred == 1] = 0 #normal
        ypred[ypred == -1] = 1 #possibly fraud
            
        #Freeze Model with joblib
        filename_pkl = 'model.pkl'
        joblib.dump(model, open(filename_pkl, 'wb'))
                
        # Evaluate Metrics
        predicted_qualities = model.predict(X_test)
        (acc_score) = eval_metrics(y_test, predicted_qualities)

        # Print out metrics
        print("evaluate_metric (n_estimators=%f, max_samples=%f):" % (n_estimators, max_samples))
        print("  ACCURACY SCORE: %s" % acc_score)
       
        # Log parameter, metrics, and model to MLflow
        mlflow.log_param(key="n_estimators", value=n_estimators)
        mlflow.log_param(key="max_samples", value=max_samples)
        mlflow.log_metrics({"accuracy score":acc_score})
        mlflow.log_artifact(filedf)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(model, "model")

with mlflow.start_run():
     for epoch in range(0, 3):
        mlflow.log_metric(key="quality", value=2*epoch, step=epoch)   
        
main(100,600)
print('\n')
main(110,630)

2022/01/15 16:35:37 INFO mlflow.tracking.fluent: Experiment with name 'evaluate_metric' does not exist. Creating a new experiment.


evaluate_metric (n_estimators=100.000000, max_samples=600.000000):
  ACCURACY SCORE: 0.9766666666666667
Save to: file:///C:/Users/HENNY/Documents/PYTHON/mlflow_project/mlruns/1/f0085241d45647bc857e46e724046884/artifacts


evaluate_metric (n_estimators=110.000000, max_samples=630.000000):
  ACCURACY SCORE: 0.9766666666666667
Save to: file:///C:/Users/HENNY/Documents/PYTHON/mlflow_project/mlruns/1/e31ddbc4503841aeac3aade558bff921/artifacts


In [2]:
from datetime import datetime
from mlflow.tracking import MlflowClient

client = MlflowClient()
experiments = client.list_experiments() # returns a list of mlflow.entities.Experiment
print(experiments)

[<Experiment: artifact_location='file:///C:/Users/HENNY/Documents/PYTHON/mlflow_project/mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>, <Experiment: artifact_location='file:///C:/Users/HENNY/Documents/PYTHON/mlflow_project/mlruns/1', experiment_id='1', lifecycle_stage='active', name='evaluate_metric', tags={}>]


In [3]:
# get the run
_run = client.get_run(run_id="e31ddbc4503841aeac3aade558bff921")
print(_run)

<Run: data=<RunData: metrics={'accuracy score': 0.9766666666666667}, params={'max_samples': '630', 'n_estimators': '110'}, tags={'mlflow.log-model.history': '[{"run_id": "e31ddbc4503841aeac3aade558bff921", '
                             '"artifact_path": "model", "utc_time_created": '
                             '"2022-01-15 09:35:45.038400", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"loader_module": "mlflow.sklearn", '
                             '"python_version": "3.8.8", "env": "conda.yaml"}, '
                             '"sklearn": {"pickled_model": "model.pkl", '
                             '"sklearn_version": "0.23.2", '
                             '"serialization_format": "cloudpickle"}}}]',
 'mlflow.source.name': 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'HENNY'}>, info=<RunInfo: artifact_uri='file:///C:/Users/HEN

In [4]:
# add a tag to the run
dt = datetime.now().strftime("%d-%m-%Y (%H:%M:%S.%f)")
client.set_tag(_run.info.run_id, "deployed", dt)
print(client.set_tag)
print('\n')
print(dt)

<bound method MlflowClient.set_tag of <mlflow.tracking.client.MlflowClient object at 0x0000026BAC227CA0>>


15-01-2022 (16:45:35.943287)


In [12]:
#%%writefile trigger_train.py

def trigger_train(): 
   
    import sklearn
    from sklearn.metrics import confusion_matrix
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report,accuracy_score
    from sklearn.ensemble import IsolationForest
    import joblib
    import datetime
    import requests
    import warnings
    warnings.filterwarnings('ignore')
       
    filedf = 'fraud_detector.csv'
    df= pd.read_csv(filedf)
    X = df.drop("Category",axis=1)
    y = df.Category
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=40)

    model= IsolationForest(n_estimators=100, max_samples=len(X_train),random_state=0, verbose=0)   
    model.fit(X_train,y_train)
    #model = joblib.load(open("model.pkl", 'rb'))
   
    ypred= model.predict(X_test)
    ypred[ypred == 1] = 0 #normal
    ypred[ypred == -1] = 1 #possibly fraud 
    
    #Freeze Model with joblib
    filename_pkl = 'model.pkl'
    joblib.dump(model, open(filename_pkl, 'wb'))
    print("model.pkl saved")
    
#Automate scheduled training mlflow Run_id    
mlflow.autolog({"run_id":"e31ddbc4503841aeac3aade558bff921"})  
trigger_train()

import schedule
schedule.every(720).hours.do(trigger_train)

2022/01/16 13:31:49 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2022/01/16 13:31:49 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '7613e491506842e2aca36b471acaeb48', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


model.pkl saved


Every 720 hours do trigger_train() (last run: [never], next run: 2022-02-15 13:31:54)

In [5]:
%%writefile main.py

import mlflow
import numpy as np
import pandas as pd
import joblib
import csv
import json
import os
import sklearn
import mlflow.sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import IsolationForest
from datetime import datetime
from mlflow.tracking import MlflowClient
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename

exp_name = "evaluate_metric"
mlflow.set_experiment(exp_name)

filedf = "fraud_detector.csv"
df = pd.read_csv(filedf)  

#Ttraining and testing dataset
X = df.drop("Category",axis=1)
y = df.Category
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=40)

model = joblib.load(open("model.pkl", 'rb'))
#model= IsolationForest(n_estimators=100, max_samples=len(X_train),random_state=0, verbose=0)   
#model.fit(X_train,y_train)

ypred= model.predict(X_test)

ypred[ypred == 1] = 0 #normal
ypred[ypred == -1] = 1 #possibly fraud

def eval_metrics(actual, pred):
    # compute relevant metrics
    acc_score = accuracy_score(y_test,ypred)
    return acc_score

def load_data(filedf):
    df = pd.read_csv(filedf)  
    X = df.drop("Category",axis=1)
    y = df.Category
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=42)
    return X_train, y_train, X_test, y_test

def main(n_estimators=40, max_samples=len(X_train)):
    # train a model with given parameters
    np.random.seed(42)

    # Read csv file 
    filedf = "fraud_detector.csv"
    train_x, train_y, test_x, test_y = load_data(filedf)

    # Useful for multiple runs     
    with mlflow.start_run():
        # Execute 
        model = joblib.load(open("model.pkl", 'rb'))
        #model= IsolationForest(n_estimators=n_estimators, max_samples=max_samples,random_state=0, verbose=0)   
        #model.fit(X_train,y_train)
        
        ypred[ypred == 1] = 0 #normal
        ypred[ypred == -1] = 1 #possibly fraud
            
        #Freeze Model with joblib
        filename_pkl = 'model.pkl'
        joblib.dump(model, open(filename_pkl, 'wb'))
        print("model.pkl saved")
        
        # Evaluate Metrics
        predicted_qualities = model.predict(X_test)
        (acc_score) = eval_metrics(y_test, predicted_qualities)

        # Print out metrics
        print("evaluate_metric (n_estimators=%f, max_samples=%f):" % (n_estimators, max_samples))
        print("  ACCURACY SCORE: %s" % acc_score)
       
        # Log parameter, metrics, and model to MLflow
        mlflow.log_param(key="n_estimators", value=n_estimators)
        mlflow.log_param(key="max_samples", value=max_samples)
        mlflow.log_metrics({"accuracy score":acc_score})
        mlflow.log_artifact(filedf)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(model, "model")

with mlflow.start_run():
     for epoch in range(0, 3):
        mlflow.log_metric(key="quality", value=2*epoch, step=epoch)        
        
main(107, 680)

#if __name__ == "__main__":
#    mlflow ui

Writing main.py


#### From terminal type 
(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> mlflow ui

(envi1) (base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> mlflow ui
INFO:waitress:Serving on http://127.0.0.1:5000

In [None]:
#schedule.every(720).hours.do(trigger_train)  
#schedule.every(10).seconds.do(trigger_train)
#schedule.every(15).minutes.do(trigger_train)
#schedule.every().day.at('09:01').do(trigger_train)

In [12]:
mlflow.end_run()

In [2]:
%%writefile app.py

import sklearn
import scipy
import numpy as np
import pandas as pd
import csv
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import IsolationForest
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename
import json
import joblib

app = Flask(__name__)

@app.route("/status")
def status():
    return "success"

@app.route("/", methods=['GET', 'POST'])
def index():
    A1 = request.args.get("A1", None)
    A2 = request.args.get("A2", None)

    #request_value = request.get_json()

    #A1 = int(request_value["A1"])
    #A2 = int(request_value["A2"])

    if A1 != None:
        y_new = predict(A1, A2)
    else:
        y_new = ""

    write(A1, A2, y_new)
    return (
        """<form action="" method="get">
                A1 input: <input type="text" name="A1">
                A2 input: <input type="text" name="A2">
                <input type="submit" value="A1 & A2 input for Predict Fraud or Not">
            </form>"""

        + "y_new: "
        + str(y_new)
    )

@app.route("/json", methods=['GET', 'POST'])
def jsonify():
    request_value = request.get_json()
    return request_value

def write(A1, A2, y_new):
    filedf = "fraud_detector.csv"
    # write new data into csv
    with open(filedf, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([A1, A2, y_new])
        print("file written")

def predict(A1, A2):
    """Predict Fraud or Not Fraud."""
    print("predicting")

    model = joblib.load(open("model.pkl", 'rb'))
    X_new = np.array([A1, A2]).reshape(1, -1)
    y_new = model.predict(X_new)

    y_new[y_new == 1] = 0  # normal
    y_new[y_new == -1] = 1  # possibly fraud

    y_new = (int(y_new))
    return y_new

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=int("5000"), debug=True, use_reloader=False)


Overwriting app.py


#### Create image and Docker Container  
(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> pip install --user virtualenv

(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> python -m venv envi1

(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project>.\envi1\Scripts\activate

(envi1)(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> pip install -r requirements.txt

(envi1)(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> docker build -t image01 .

(envi1)(base) PS C:\Users\HENNY\Documents\PYTHON\mlflow_project> docker run --name container01 -p 5000:5000 image01


# AzureML

In [2]:
import mlflow
import mlflow.sklearn
import azure
import azureml.core
from datetime import datetime
from mlflow.tracking import MlflowClient

from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core import Experiment
from azureml.core.webservice import Webservice
from azureml.core.image import ContainerImage
from azureml.core.webservice import AciWebservice
from azureml.core.conda_dependencies import CondaDependencies

import mlflow.azureml
from azureml.core import Workspace
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice

from azureml.core import Experiment
from azureml.core import Environment
from azureml.core.environment import Environment
from azureml.core.experiment import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig

AZ_SUBSCRIPTION_ID='b667c281-58f0-47a7-899b-63ea6cd8b7e8' #azure-subscription-id
ws = Workspace.create(name='ws1',subscription_id=AZ_SUBSCRIPTION_ID, 
                      resource_group='res1',create_resource_group=True,location='southeastasia')

Deploying KeyVault with name ws1keyvault5baf6f8ab8d64.
Deploying StorageAccount with name ws1storage4dbfefc102d446.
Deploying AppInsights with name ws1insightsde9d189167484.
Deployed AppInsights with name ws1insightsde9d189167484. Took 4.48 seconds.
Deploying Workspace with name ws1.
Deployed KeyVault with name ws1keyvault5baf6f8ab8d64. Took 22.98 seconds.
Deployed StorageAccount with name ws1storage4dbfefc102d446. Took 29.27 seconds.
Deployed Workspace with name ws1. Took 38.88 seconds.


In [5]:
#%%writefile score.py

import numpy as np
import pandas as pd
import joblib
import csv
import json
import os
import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import IsolationForest
from datetime import datetime
from mlflow.tracking import MlflowClient
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename

import os
import mlflow
import mlflow.sklearn
import azure
import azureml.core
from datetime import datetime
from mlflow.tracking import MlflowClient

from azureml.core import Workspace
from azureml.core.model import Model

from azureml.core import Experiment
from azureml.core.webservice import Webservice
from azureml.core.image import ContainerImage
from azureml.core.webservice import AciWebservice
from azureml.core.conda_dependencies import CondaDependencies

import mlflow.azureml
from azureml.core import Workspace
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice

from azureml.core import Experiment
from azureml.core import Environment
from azureml.core.environment import Environment
from azureml.core.experiment import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig
from azureml.core.runconfig import RunConfiguration

mlflow.end_run() #Close previous Run
ws.write_config(path=".", file_name="ws_config.json")
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

#mlflow.create_experiment(exp_name)
exp_nameB = "eval_metric"
mlflow.set_experiment(exp_nameB)
#mlflow_run = mlflow.start_run()
my_uri = ws.get_mlflow_tracking_uri()

filedf = "fraud_detector.csv"
df = pd.read_csv(filedf)  

#Ttraining and testing dataset
X = df.drop("Category",axis=1)
y = df.Category
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=42)

model= IsolationForest(n_estimators=100, max_samples=len(X_train),random_state=0, verbose=0)   
model.fit(X_train,y_train)
ypred= model.predict(X_test)

ypred[ypred == 1] = 0 #normal
ypred[ypred == -1] = 1 #possibly fraud

def eval_metrics(actual, pred):
    # compute relevant metrics
    acc_score = accuracy_score(y_test,ypred)
    return acc_score

def load_data(filedf):
    df = pd.read_csv(filedf)  
    X = df.drop("Category",axis=1)
    y = df.Category
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=42)
    return X_train, y_train, X_test, y_test

def train(n_estimators=40, max_samples=len(X_train)):
    # train a model with given parameters
    np.random.seed(42)

    # Read csv file 
    filedf = "fraud_detector.csv"
    train_x, train_y, test_x, test_y = load_data(filedf)

    # Useful for multiple runs     
    with mlflow.start_run():
        # Execute 
        model= IsolationForest(n_estimators=n_estimators, max_samples=max_samples,random_state=0, verbose=0)   
        model.fit(X_train,y_train)
        
        ypred[ypred == 1] = 0 #normal
        ypred[ypred == -1] = 1 #possibly fraud
        
        # Evaluate Metrics
        predicted_qualities = model.predict(X_test)
        (acc_score) = eval_metrics(y_test, predicted_qualities)

        # Print out metrics
        print("evaluate_metric (n_estimators=%f, max_samples=%f):" % (n_estimators, max_samples))
        print("  ACCURACY SCORE: %s" % acc_score)
       
        # Log parameter, metrics, and model to MLflow
        mlflow.log_param(key="n_estimators", value=n_estimators)
        mlflow.log_param(key="max_samples", value=max_samples)
        mlflow.log_metrics({"accuracy score":acc_score})
        mlflow.log_artifact(filedf)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        
        mlflow.sklearn.log_model(model, "model")

with mlflow.start_run():
     for epoch in range(0, 3):
        mlflow.log_metric(key="quality", value=2*epoch, step=epoch)        
        
train(120, 700)

evaluate_metric (n_estimators=120.000000, max_samples=700.000000):
  ACCURACY SCORE: 0.9833333333333333
Save to: azureml://experiments/eval_metric/runs/04709530-c2be-473d-9d95-0acd270c6cd0/artifacts


In [6]:
train(200, 600)

evaluate_metric (n_estimators=200.000000, max_samples=600.000000):
  ACCURACY SCORE: 0.9833333333333333
Save to: azureml://experiments/eval_metric/runs/32f78059-caba-45db-a885-7b11f3506623/artifacts


In [10]:
mlflow.end_run() #End previous Run
mlflow_run = mlflow.start_run() #start new Run

client = MlflowClient()
finished_mlflow_run = MlflowClient().get_run(mlflow_run.info.run_id)

exp = Experiment(workspace=ws, name=exp_nameB)
list_experiments = exp.list(ws)

list_runs = exp.get_runs()
for run in list_runs:
    print(run.id)

37b89e16-4cd3-435d-b823-d5eecd040820
bdb569bd-d092-481d-a34c-8d1b39eb6c9f
32f78059-caba-45db-a885-7b11f3506623
04709530-c2be-473d-9d95-0acd270c6cd0
8453c18a-f1fa-460c-880d-8ce1ae7ce07b


In [13]:
metrics = finished_mlflow_run.data.metrics
tags = finished_mlflow_run.data.tags
params = finished_mlflow_run.data.params
print(tags)

{'mlflow.user': 'HENNY', 'mlflow.source.name': 'C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py', 'mlflow.source.type': 'LOCAL', 'mlflow.rootRunId': '37b89e16-4cd3-435d-b823-d5eecd040820'}


In [17]:
from azureml.core.model import Model
model = Model.register(model_path = "model.pkl",
                       model_name = "fraud_detect",
                       tags = {"key": "1"},
                       description = "fraud Prediction", 
                       workspace = ws,)

Registering model fraud_detect


In [20]:
#import model
model = Model(workspace=ws, name="fraud_detect")
model.download(target_dir=os.getcwd(), exist_ok=True)
#model.delete()

'C:\\Users\\HENNY\\Documents\\PYTHON\\mlflow_project\\model.pkl'

In [21]:
print(model.name, model.id, model.version, sep='\t')
azureml.core.compute.ComputeTarget
azureml.core.runconfig.RunConfiguration

fraud_detect	fraud_detect:1	1


azureml.core.runconfig.RunConfiguration

In [22]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.compute import AmlCompute
list_vms = AmlCompute.supported_vmsizes(workspace=ws)

compute_config = RunConfiguration()
#compute_config.target = "amlcompute"
#compute_config.amlcompute.vm_size = "STANDARD_D1_V2"

In [8]:
%%writefile score.py
import json
from azureml.core.model import Model

def init():
    global model
    # retrieve the path to the model file using the model name
    model_path = Model.get_model_path('fraud_detect')
    #model_path = "model.pkl"
    model = joblib.load(model_path)

def run(raw_data):
    data = np.array(json.loads(raw_data)['data'])
    # make prediction
    ypred = model.predict(data)
    return json.dumps(ypred.tolist())

Overwriting score.py


In [9]:
%%time
image_config = ContainerImage.image_configuration(execution_script="score.py", runtime="python")

Wall time: 0 ns


### Inference Config

In [27]:
env=ws.environments['AzureML-mlflow-ubuntu18.04-py37-cpu-inference']

dummy_inference_config = InferenceConfig(environment=env, source_directory='.', entry_script="score.py")
print(dummy_inference_config)
print(InferenceConfig)
print(image_config)



InferenceConfig(entry_script=score.py, runtime=None, conda_file=None, extra_docker_file_steps=None, source_directory=C:\Users\HENNY\Documents\PYTHON\mlflow_project, enable_gpu=None, base_image=None, base_image_registry=<azureml.core.container_registry.ContainerRegistry object at 0x000001C23506C2B0>)
<class 'azureml.core.model.InferenceConfig'>
<azureml.core.image.container.ContainerImageConfig object at 0x000001C235147490>


In [32]:
from azureml.core.webservice import AciWebservice, Webservice
# Create a deployment config
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1, auth_enabled=True)
aci_config

<azureml.core.webservice.aci.AciServiceDeploymentConfiguration at 0x1c235ad2580>

In [10]:
# create environment for the deploy
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.webservice import AciWebservice

# get a curated environment
#env=ws.environments['AzureML-mlflow-ubuntu18.04-py37-cpu-inference']

env = Environment.get(workspace=ws, name="AzureML-mlflow-ubuntu18.04-py37-cpu-inference",version=1)
env.inferencing_stack_version='latest'

# create deployment config i.e. compute resources
aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,memory_gb=1,
                        tags={"data": "MNIST", "method": "sklearn"},description="Predict fraud")

dummy_inference_config = InferenceConfig(environment=env, source_directory='.', entry_script="score.py")
print(dummy_inference_config)
print(InferenceConfig)
print(image_config)

InferenceConfig(entry_script=score.py, runtime=None, conda_file=None, extra_docker_file_steps=None, source_directory=C:\Users\HENNY\Documents\PYTHON\mlflow_project, enable_gpu=None, base_image=None, base_image_registry=<azureml.core.container_registry.ContainerRegistry object at 0x00000219AF2FA190>)
<class 'azureml.core.model.InferenceConfig'>
<azureml.core.image.container.ContainerImageConfig object at 0x00000219AF2FAFA0>


In [11]:
src = ScriptRunConfig(source_directory='.', script='score.py', environment=env)
print(src)

<azureml.core.script_run_config.ScriptRunConfig object at 0x00000219AE9A7370>


## Custom environment yaml.file

In [5]:
from azureml.core import Environment
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import InferenceConfig

myenv = CondaDependencies()
myenv.add_conda_package('')

myenv.add_pip_package('pip==21.0.1')
myenv.add_pip_package('azureml.core')
myenv.add_conda_package("scikit-learn")
myenv.add_conda_package("mlflow")
myenv.add_conda_package('numpy')
myenv.add_conda_package("pandas")
myenv.add_conda_package("joblib")
myenv.add_conda_package("python==3.6.2")
myenv.add_conda_package("flask == 2.0.1")

with open("myenv.Yaml","w") as f:
    f.write(myenv.serialize_to_string())
with open("myenv.Yaml","r") as f:
    print(f.read())
    
myenv = Environment.from_conda_specification(name='myenv', file_path='myenv.yaml')    

# Conda environment specification. The dependencies defined in this file will

# be automatically provisioned for runs with userManagedDependencies=False.


# Details about the Conda environment file format:

# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually


name: project_environment
dependencies:
  # The python interpreter version.

  # Currently Azure ML only supports 3.5.2 and later.

- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.

  - azureml-defaults

  - pip==21.0.1
  - azureml.core
- ''
- scikit-learn
- mlflow
- numpy
- pandas
- joblib
- flask == 2.0.1
channels:
- anaconda
- conda-forge

