### Setup MLRun Project

In [308]:
import os
from os import path
from mlrun import set_environment, new_project, mlconf

# Set the default environment configuration
project_name, artifact_path = set_environment(project="gitops-project", artifact_path='v3io:///users/{{run.user}}/pipe/{{workflow.uid}}')

# Create project
project_path = path.abspath("project")
project = new_project(name=project_name, context=project_path)

### Build Docker Image

In [309]:
image = f"docker-registry.{os.getenv('IGZ_NAMESPACE_DOMAIN')}:80/gitops-image"
image

'docker-registry.default-tenant.app.us-sales-eks.iguazio-cd0.com:80/gitops-image'

In [310]:
# # Build Docker Image (only needs to be run once)
# from mlrun import new_function

# build_image = new_function(name="build-image", kind="job")
# build_image.build_config(
#     image=image,
#     base_image="mlrun/mlrun",
#     commands=["pip install PyGithub"]
# )
# build_image.deploy(with_mlrun=False)

In [311]:
image

'docker-registry.default-tenant.app.us-sales-eks.iguazio-cd0.com:80/gitops-image'

### Import Functions

In [411]:
project.set_function(name="gen-iris",
                    func="components/gen_iris.py",
                    kind="job",
                    image=image)
project.set_function(name="describe",
                    func="hub://describe",
                    kind="job",
                    image=image)
project.set_function(name="train",
                    func="components/sklearn_classifier.py",
                    kind="job",
                    image=image)
project.set_function(name="test",
                    func="components/test_classifier.py",
                    kind="job",
                    image=image)
project.set_function(name="serving",
                    func="hub://v2_model_server",
                    kind="serving",
                    image=image)
project.set_function(name="live_tester",
                    func="components/model_server_tester.py",
                    kind="nuclio",
                    image=image)

<mlrun.runtimes.function.RemoteRuntime at 0x7f0c7493c690>

### Training Pipeline

In [412]:
%%writefile {path.join(project_path, 'pipelines/train.py')}
from kfp import dsl
from mlrun import mount_v3io, NewTask

funcs = {}
this_project = None
DATASET = 'iris_dataset'
LABELS  = "label"

# init functions is used to configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env("GITHUB_TOKEN", secrets.get("GITHUB_TOKEN"))

@dsl.pipeline(
    name="Demo training pipeline",
    description="Shows how to use mlrun."
)
def kfpipeline(
    existing_model_path:str="None"
):
    
    # run the ingestion function with the new image and params
    ingest = funcs['gen-iris'].as_step(
        name="get-data",
        handler='iris_generator',
        params={'format': 'pq'},
        outputs=[DATASET])

    # analyze our dataset
    describe = funcs["describe"].as_step(
        name="summary",
        params={"label_column": LABELS},
        inputs={"table": ingest.outputs[DATASET]})
    
    # train with hyper-paremeters
    train = funcs["train"].as_step(
        name="train",
        handler="train_model",
        params={"sample"          : -1,
                "label_column"    : LABELS,
                "test_size"       : 0.10},
        hyperparams={'model_pkg_class': ["sklearn.ensemble.RandomForestClassifier",
                                         "sklearn.linear_model.LogisticRegression",
                                         "sklearn.ensemble.AdaBoostClassifier"]},
        selector='max.accuracy',
        inputs={"dataset"         : ingest.outputs[DATASET]},
        labels={"commit": this_project.params.get('commit', '')},
        outputs=['model', 'test_set'])

    # test and visualize our model
    test = funcs["test"].as_step(
        name="test",
        handler="test_classifier",
        params={"label_column": LABELS,
                "new_model_path" : train.outputs['model'],
                "existing_model_path" : existing_model_path,
                "comparison_metric": "accuracy",
                "post_github" : True},
        inputs={"test_set"    : train.outputs['test_set']})

#     # deploy our model as a serverless function
#     deploy = funcs["serving"].deploy_step(models={f"{DATASET}_v1": train.outputs['model']},
#                                           tag=this_project.params.get('commit', 'v1'))

#     # test out new model server (via REST API calls)
#     tester = funcs["live_tester"].as_step(name='model-tester',
#         params={'addr': deploy.outputs['endpoint'], 'model': f"{DATASET}_v1"},
#         inputs={'table': train.outputs['test_set']})

Overwriting /User/mlrun-github-actions-demo/project/pipelines/train.py


### Deployment Pipeline

In [413]:
%%writefile {path.join(project_path, 'pipelines/deploy.py')}
from kfp import dsl
from mlrun import mount_v3io, NewTask
import nuclio

funcs = {}
this_project = None
DATASET = 'iris_dataset'
LABELS  = "label"

# init functions is used to configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env("GITHUB_TOKEN", secrets.get("GITHUB_TOKEN"))
        
    # Enable model monitoring
    functions["serving"].set_tracking()
    functions["live_tester"].add_trigger('cron', nuclio.triggers.CronTrigger(interval="1s"))

@dsl.pipeline(
    name="Demo training pipeline",
    description="Shows how to use mlrun."
)
def kfpipeline(
    model_path:str
):

    # deploy our model as a serverless function
    deploy = funcs["serving"].deploy_step(models={f"model": model_path},
                                          tag=this_project.params.get('commit', 'v1'))

    # test out new model server (via REST API calls)
    tester = funcs["live_tester"].deploy_step(env={"addr" : deploy.outputs["endpoint"],"model_path" : model_path})

Overwriting /User/mlrun-github-actions-demo/project/pipelines/deploy.py


### Add GitHub secret

In [414]:
project.with_secrets("file", "ghtoken.txt")

<mlrun.projects.project.MlrunProject at 0x7f0c80a70250>

### Save Pipeline

In [415]:
project.set_workflow("train", "pipelines/train.py")
project.set_workflow("deploy", "pipelines/deploy.py")
project.save()

### Run Train Pipeline

In [417]:
# run_id = project.run("train", arguments={"existing_model_path":"store://models/gitops-project/train_model#2@139f410a-70b0-489d-a748-593167757909"}, artifact_path=artifact_path, dirty=True, watch=True)

### Run Deploy Pipeline

In [404]:
run_id = project.run("deploy", arguments={"model_path":"store://models/gitops-project/train_model#2@3f49ccb6-895c-4e0d-848a-79a81108e248"}, artifact_path=artifact_path, dirty=True, watch=True)

> 2021-07-31 00:27:36,102 [info] Pipeline run id=593db78a-bb27-4979-b4a1-2a947017c315, check UI or DB for progress
> 2021-07-31 00:27:36,103 [info] waiting for pipeline run completion


In [397]:
!curl http://default-tenant.app.us-sales-eks.iguazio-cd0.com:32344/

^C
