### Setup MLRun Project

In [39]:
import os
from os import path
from mlrun import set_environment, new_project, mlconf

# Set the default environment configuration
project_name, artifact_path = set_environment(project="gitops-project", artifact_path='v3io:///users/{{run.user}}/pipe/{{workflow.uid}}')

# Create project
project_path = path.abspath("project")
project = new_project(name=project_name, context=project_path)

### Build Docker Image

In [40]:
image = f"docker-registry.{os.getenv('IGZ_NAMESPACE_DOMAIN')}:80/gitops-image"
image

'docker-registry.default-tenant.app.us-sales-eks.iguazio-cd0.com:80/gitops-image'

In [41]:
# # Build Docker Image (only needs to be run once)
# from mlrun import new_function

# build_image = new_function(name="build-image", kind="job")
# build_image.build_config(
#     image=image,
#     base_image="mlrun/mlrun",
#     commands=["pip install PyGithub"]
# )
# build_image.deploy(with_mlrun=False)

In [42]:
image

'docker-registry.default-tenant.app.us-sales-eks.iguazio-cd0.com:80/gitops-image'

### Import Functions

In [43]:
project.set_function(name="gen-iris",
                    func="components/gen_iris.py",
                    kind="job",
                    image=image)
project.set_function(name="describe",
                    func="hub://describe",
                    kind="job",
                    image=image)
project.set_function(name="train",
                    func="components/sklearn_classifier.py",
                    kind="job",
                    image=image)
project.set_function(name="test",
                    func="components/test_classifier.py",
                    kind="job",
                    image=image)
project.set_function(name="serving",
                    func="hub://v2_model_server",
                    kind="serving",
                    image=image)
project.set_function(name="live-tester",
                    func="components/model_server_tester.py",
                    kind="nuclio",
                    image=image)
project.set_function(name="drift-watcher",
                    func="components/drift_watcher.py",
                    kind="nuclio",
                    image=image)

<mlrun.runtimes.function.RemoteRuntime at 0x7f68d01cf3d0>

### Training Pipeline

In [44]:
%%writefile {path.join(project_path, 'pipelines/train.py')}
from kfp import dsl
from mlrun import mount_v3io, NewTask

funcs = {}
this_project = None
DATASET = 'iris_dataset'
LABELS  = "label"

# init functions is used to configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env("GITHUB_TOKEN", secrets.get("MY_GITHUB_TOKEN"))

@dsl.pipeline(
    name="GitOps Training Pipeline",
    description="Train a model"
)
def kfpipeline(
    existing_model_path:str="None",
    force_deploy:bool=False,
):
    
    # run the ingestion function with the new image and params
    ingest = funcs['gen-iris'].as_step(
        name="get-data",
        handler='iris_generator',
        params={'format': 'pq'},
        outputs=[DATASET])

    # analyze our dataset
    describe = funcs["describe"].as_step(
        name="summary",
        params={"label_column": LABELS},
        inputs={"table": ingest.outputs[DATASET]})
    
    # train with hyper-paremeters
    train = funcs["train"].as_step(
        name="train",
        handler="train_model",
        params={"sample"          : -1,
                "label_column"    : LABELS,
                "test_size"       : 0.10},
        hyperparams={'model_pkg_class': ["sklearn.ensemble.RandomForestClassifier",
                                         "sklearn.linear_model.LogisticRegression",
                                         "sklearn.ensemble.AdaBoostClassifier"]},
        selector='max.accuracy',
        inputs={"dataset"         : ingest.outputs[DATASET]},
        labels={"commit": this_project.params.get('commit', '')},
        outputs=['model', 'test_set'])

    # test and visualize our model
    test = funcs["test"].as_step(
        name="test",
        handler="test_classifier",
        params={"label_column": LABELS,
                "new_model_path" : train.outputs['model'],
                "existing_model_path" : existing_model_path,
                "comparison_metric": "accuracy",
                "post_github" : True,
                "force_deploy" : force_deploy},
        inputs={"test_set"    : train.outputs['test_set']})

Overwriting /User/mlrun-github-actions-demo/project/pipelines/train.py


### Deployment Pipeline

In [45]:
%%writefile {path.join(project_path, 'pipelines/deploy.py')}
from kfp import dsl
from mlrun import mount_v3io, NewTask
import nuclio

funcs = {}
this_project = None
DATASET = 'iris_dataset'
LABELS  = "label"

# init functions is used to configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env("GITHUB_TOKEN", secrets.get("MY_GITHUB_TOKEN"))
        
    # Enable model monitoring
    functions["serving"].set_tracking()
    functions["live-tester"].add_trigger('cron', nuclio.triggers.CronTrigger(interval="1s"))
    functions["drift-watcher"].add_v3io_stream_trigger(name="stream",
                                                       stream_path=f"projects/{project.metadata.name}/model-endpoints/log_stream",
                                                       seek_to="latest")
    
@dsl.pipeline(
    name="GitOps Deployment Pipeline",
    description="Deploy a model."
)
def kfpipeline(
    model_path:str="None"
):

    # deploy our model as a serverless function
    deploy = funcs["serving"].deploy_step(models={f"model": model_path},
                                          tag=this_project.params.get('commit', 'v1'))

    # test out new model server (via REST API calls)
    tester = funcs["live-tester"].deploy_step(env={"addr" : deploy.outputs["endpoint"],"model_path" : model_path})
    
    # drift watcher to post on github
    watcher = funcs["drift-watcher"].deploy_step().after(deploy)

Overwriting /User/mlrun-github-actions-demo/project/pipelines/deploy.py


### Add GitHub secret

In [46]:
project.with_secrets("file", "ghtoken.txt")

<mlrun.projects.project.MlrunProject at 0x7f68d01d8410>

### Save Pipeline

In [47]:
project.set_workflow("train", "pipelines/train.py")
project.set_workflow("deploy", "pipelines/deploy.py")
project.save()

### Run Train Pipeline

In [23]:
run_id = project.run("train", arguments={}, artifact_path=artifact_path, dirty=True, watch=True)

> 2021-08-03 00:23:40,913 [info] Pipeline run id=9899909d-9153-47e2-9ba0-6a32ca3d86bc, check UI or DB for progress
> 2021-08-03 00:23:40,914 [info] waiting for pipeline run completion


uid,start,state,name,results,artifacts
...3ad5d114,Aug 03 00:24:31,completed,test,accuracy-new_model=0.9333333333333333test-error-new_model=0.06666666666666667f1-new_model=0.9137254901960784precision-new_model=0.8888888888888888recall-new_model=0.9629629629629629,test_set_preds-new_model
...542d1cfd,Aug 03 00:24:03,completed,train,best_iteration=1accuracy=0.975609756097561test-error=0.024390243902439025auc-micro=0.9979179060083284auc-weighted=0.9966358284272497f1-score=0.9721739130434783precision_score=0.9743589743589745recall_score=0.9722222222222222,test_setconfusion-matrixfeature-importancesprecision-recall-multiclassroc-multiclassmodeliteration_results
...225eb7c0,Aug 03 00:24:04,completed,summary,,histogramsviolinimbalanceimbalance-weights-veccorrelation-matrixcorrelation
...a61f0f79,Aug 03 00:23:50,completed,get-data,,iris_dataset


In [22]:
run_id = project.run("train", arguments={"existing_model_path":"store://models/gitops-project/train_model#2@3f49ccb6-895c-4e0d-848a-79a81108e248"}, artifact_path=artifact_path, dirty=True, watch=True)

> 2021-08-03 00:22:11,758 [info] Pipeline run id=33f333dc-0ca3-40c7-b535-bb0cf36d0e57, check UI or DB for progress
> 2021-08-03 00:22:11,759 [info] waiting for pipeline run completion


uid,start,state,name,results,artifacts
...f20a1fe0,Aug 03 00:23:03,completed,test,accuracy-new_model=0.9333333333333333test-error-new_model=0.06666666666666667f1-new_model=0.9137254901960784precision-new_model=0.8888888888888888recall-new_model=0.9629629629629629accuracy-existing_model=0.9333333333333333test-error-existing_model=0.06666666666666667f1-existing_model=0.9137254901960784precision-existing_model=0.8888888888888888recall-existing_model=0.9629629629629629,test_set_preds-new_modeltest_set_preds-existing_model
...c0b63f94,Aug 03 00:22:35,completed,summary,,histogramsviolinimbalanceimbalance-weights-veccorrelation-matrixcorrelation
...d7d07258,Aug 03 00:22:33,completed,train,best_iteration=2accuracy=0.975609756097561test-error=0.024390243902439025auc-micro=0.9973230220107079auc-weighted=0.9966358284272497f1-score=0.9721739130434783precision_score=0.9743589743589745recall_score=0.9722222222222222,test_setconfusion-matrixprecision-recall-multiclassroc-multiclassmodeliteration_results
...8f9303ec,Aug 03 00:22:20,completed,get-data,,iris_dataset


### Run Deploy Pipeline

In [11]:
run_id = project.run("deploy", arguments={"model_path":"store://models/gitops-project/train_model#2@8c199527-740f-47ce-a2c2-91aedff255ba"}, artifact_path=artifact_path, dirty=True, watch=True)

> 2021-07-31 21:12:38,816 [info] Pipeline run id=bda850fd-790e-4fe5-81d8-fea16a34ea45, check UI or DB for progress
> 2021-07-31 21:12:38,816 [info] waiting for pipeline run completion


In [427]:
!curl http://default-tenant.app.us-sales-eks.iguazio-cd0.com:32344/