### Setup MLRun Project

In [1]:
import os
from os import path
from mlrun import set_environment, new_project, mlconf

# Set the default environment configuration
project_name, artifact_path = set_environment(project="gitops-project", artifact_path='v3io:///users/{{run.user}}/pipe/{{workflow.uid}}')

# Create project
project_path = path.abspath("project")
project = new_project(name=project_name, context=project_path)

### Build Docker Image

In [2]:
image = f"docker-registry.{os.getenv('IGZ_NAMESPACE_DOMAIN')}:80/gitops-image"
image

'docker-registry.default-tenant.app.us-sales-eks.iguazio-cd0.com:80/gitops-image'

In [3]:
# # Build Docker Image (only needs to be run once)
# from mlrun import new_function

# build_image = new_function(name="build-image", kind="job")
# build_image.build_config(
#     image=image,
#     base_image="mlrun/mlrun",
#     commands=["pip install PyGithub"]
# )
# build_image.deploy(with_mlrun=False)

In [4]:
image

'docker-registry.default-tenant.app.us-sales-eks.iguazio-cd0.com:80/gitops-image'

### Import Functions

In [5]:
project.set_function(name="gen-iris",
                    func="components/gen_iris.py",
                    kind="job",
                    image=image)
project.set_function(name="describe",
                    func="hub://describe",
                    kind="job",
                    image=image)
project.set_function(name="train",
                    func="components/sklearn_classifier.py",
                    kind="job",
                    image=image)
project.set_function(name="test",
                    func="components/test_classifier.py",
                    kind="job",
                    image=image)
project.set_function(name="serving",
                    func="hub://v2_model_server",
                    kind="serving",
                    image=image)
project.set_function(name="live-tester",
                    func="components/model_server_tester.py",
                    kind="nuclio",
                    image=image)
project.set_function(name="drift-watcher",
                    func="components/drift_watcher.py",
                    kind="nuclio",
                    image=image)

<mlrun.runtimes.function.RemoteRuntime at 0x7faab78f2090>

### Training Pipeline

In [6]:
%%writefile {path.join(project_path, 'pipelines/train.py')}
from kfp import dsl
from mlrun import mount_v3io, NewTask

funcs = {}
this_project = None
DATASET = 'iris_dataset'
LABELS  = "label"

# init functions is used to configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env("GITHUB_TOKEN", secrets.get("GITHUB_TOKEN"))

@dsl.pipeline(
    name="Demo training pipeline",
    description="Shows how to use mlrun."
)
def kfpipeline(
    existing_model_path:str="None"
):
    
    # run the ingestion function with the new image and params
    ingest = funcs['gen-iris'].as_step(
        name="get-data",
        handler='iris_generator',
        params={'format': 'pq'},
        outputs=[DATASET])

    # analyze our dataset
    describe = funcs["describe"].as_step(
        name="summary",
        params={"label_column": LABELS},
        inputs={"table": ingest.outputs[DATASET]})
    
    # train with hyper-paremeters
    train = funcs["train"].as_step(
        name="train",
        handler="train_model",
        params={"sample"          : -1,
                "label_column"    : LABELS,
                "test_size"       : 0.10},
        hyperparams={'model_pkg_class': ["sklearn.ensemble.RandomForestClassifier",
                                         "sklearn.linear_model.LogisticRegression",
                                         "sklearn.ensemble.AdaBoostClassifier"]},
        selector='max.accuracy',
        inputs={"dataset"         : ingest.outputs[DATASET]},
        labels={"commit": this_project.params.get('commit', '')},
        outputs=['model', 'test_set'])

    # test and visualize our model
    test = funcs["test"].as_step(
        name="test",
        handler="test_classifier",
        params={"label_column": LABELS,
                "new_model_path" : train.outputs['model'],
                "existing_model_path" : existing_model_path,
                "comparison_metric": "accuracy",
                "post_github" : True},
        inputs={"test_set"    : train.outputs['test_set']})

Overwriting /User/mlrun-github-actions-demo/project/pipelines/train.py


### Deployment Pipeline

In [7]:
%%writefile {path.join(project_path, 'pipelines/deploy.py')}
from kfp import dsl
from mlrun import mount_v3io, NewTask
import nuclio

funcs = {}
this_project = None
DATASET = 'iris_dataset'
LABELS  = "label"

# init functions is used to configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env("GITHUB_TOKEN", secrets.get("GITHUB_TOKEN"))
        
    # Enable model monitoring
    functions["serving"].set_tracking()
    functions["live-tester"].add_trigger('cron', nuclio.triggers.CronTrigger(interval="1s"))
    functions["drift-watcher"].add_v3io_stream_trigger(name="stream",
                                                       stream_path=f"projects/{project.metadata.name}/model-endpoints/log_stream",
                                                       seek_to="latest")
    
@dsl.pipeline(
    name="Demo training pipeline",
    description="Shows how to use mlrun."
)
def kfpipeline(
    model_path:str
):

    # deploy our model as a serverless function
    deploy = funcs["serving"].deploy_step(models={f"model": model_path},
                                          tag=this_project.params.get('commit', 'v1'))

    # test out new model server (via REST API calls)
    tester = funcs["live-tester"].deploy_step(env={"addr" : deploy.outputs["endpoint"],"model_path" : model_path})
    
    # drift watcher to post on github
    watcher = funcs["drift-watcher"].deploy_step().after(deploy)

Overwriting /User/mlrun-github-actions-demo/project/pipelines/deploy.py


### Add GitHub secret

In [8]:
project.with_secrets("file", "ghtoken.txt")

<mlrun.projects.project.MlrunProject at 0x7faac1499510>

### Save Pipeline

In [9]:
project.set_workflow("train", "pipelines/train.py")
project.set_workflow("deploy", "pipelines/deploy.py")
project.save()

### Run Train Pipeline

In [10]:
run_id = project.run("train", arguments={"existing_model_path":"store://models/gitops-project/train_model#2@3f49ccb6-895c-4e0d-848a-79a81108e248"}, artifact_path=artifact_path, dirty=True, watch=True)

> 2021-07-31 21:09:44,181 [info] using in-cluster config.


> 2021-07-31 21:09:44,647 [info] Pipeline run id=8c199527-740f-47ce-a2c2-91aedff255ba, check UI or DB for progress
> 2021-07-31 21:09:44,647 [info] waiting for pipeline run completion


uid,start,state,name,results,artifacts
...87776986,Jul 31 21:10:34,completed,test,accuracy-new_model=0.9333333333333333test-error-new_model=0.06666666666666667f1-new_model=0.9137254901960784precision-new_model=0.8888888888888888recall-new_model=0.9629629629629629accuracy-existing_model=0.9333333333333333test-error-existing_model=0.06666666666666667f1-existing_model=0.9137254901960784precision-existing_model=0.8888888888888888recall-existing_model=0.9629629629629629,test_set_preds-new_modeltest_set_preds-existing_model
...815c4546,Jul 31 21:10:08,completed,summary,,histogramsviolinimbalanceimbalance-weights-veccorrelation-matrixcorrelation
...50800280,Jul 31 21:10:06,completed,train,best_iteration=2accuracy=0.975609756097561test-error=0.024390243902439025auc-micro=0.9973230220107079auc-weighted=0.9966358284272497f1-score=0.9721739130434783precision_score=0.9743589743589745recall_score=0.9722222222222222,test_setconfusion-matrixprecision-recall-multiclassroc-multiclassmodeliteration_results
...e48c0212,Jul 31 21:09:53,completed,get-data,,iris_dataset


### Run Deploy Pipeline

In [11]:
run_id = project.run("deploy", arguments={"model_path":"store://models/gitops-project/train_model#2@8c199527-740f-47ce-a2c2-91aedff255ba"}, artifact_path=artifact_path, dirty=True, watch=True)

> 2021-07-31 21:12:38,816 [info] Pipeline run id=bda850fd-790e-4fe5-81d8-fea16a34ea45, check UI or DB for progress
> 2021-07-31 21:12:38,816 [info] waiting for pipeline run completion


In [427]:
!curl http://default-tenant.app.us-sales-eks.iguazio-cd0.com:32344/