### Setup MLRun Project

In [171]:
import os
from os import path
from mlrun import set_environment, new_project, mlconf

# Set the default environment configuration
project_name, artifact_path = set_environment(project="gitops-project", artifact_path='v3io:///users/{{run.user}}/pipe/{{workflow.uid}}')

# Create project
project_path = path.abspath("project")
project = new_project(name=project_name, context=project_path)

### Build Docker Image

In [172]:
image = f"docker-registry.{os.getenv('IGZ_NAMESPACE_DOMAIN')}:80/gitops-image"
image

'docker-registry.default-tenant.app.us-sales-eks.iguazio-cd0.com:80/gitops-image'

In [173]:
# # Build Docker Image (only needs to be run once)
# from mlrun import new_function

# build_image = new_function(name="build-image", kind="job")
# build_image.build_config(
#     image=image,
#     base_image="mlrun/mlrun",
#     commands=["pip install PyGithub"]
# )
# build_image.deploy(with_mlrun=False)

In [174]:
image

'docker-registry.default-tenant.app.us-sales-eks.iguazio-cd0.com:80/gitops-image'

### Import Functions

In [300]:
project.set_function(name="gen-iris",
                    func="components/gen_iris.py",
                    kind="job",
                    image=image)
project.set_function(name="describe",
                    func="hub://describe",
                    kind="job",
                    image=image)
project.set_function(name="train",
                    func="hub://sklearn_classifier",
                    kind="job",
                    image=image)
project.set_function(name="test",
                    func="components/test_classifier.py",
                    kind="job",
                    image=image)
project.set_function(name="serving",
                    func="hub://model_server",
                    kind="serving",
                    image=image)
project.set_function(name="live_tester",
                    func="hub://model_server_tester",
                    kind="serving",
                    image=image)

<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f0c75169190>

### Pipeline

In [301]:
%%writefile {path.join(project_path, 'pipelines/train.py')}
from kfp import dsl
from mlrun import mount_v3io, NewTask

funcs = {}
this_project = None
DATASET = 'iris_dataset'
LABELS  = "label"

# init functions is used to configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env("GITHUB_TOKEN", secrets.get("GITHUB_TOKEN"))

@dsl.pipeline(
    name="Demo training pipeline",
    description="Shows how to use mlrun."
)
def kfpipeline(
    existing_model_path:str="None"
):
    
    # run the ingestion function with the new image and params
    ingest = funcs['gen-iris'].as_step(
        name="get-data",
        handler='iris_generator',
        params={'format': 'pq'},
        outputs=[DATASET])

    # analyze our dataset
    describe = funcs["describe"].as_step(
        name="summary",
        params={"label_column": LABELS},
        inputs={"table": ingest.outputs[DATASET]})
    
    # train with hyper-paremeters
    train = funcs["train"].as_step(
        name="train",
        params={"sample"          : -1,
                "label_column"    : LABELS,
                "test_size"       : 0.10},
        hyperparams={'model_pkg_class': ["sklearn.ensemble.RandomForestClassifier",
                                         "sklearn.linear_model.LogisticRegression",
                                         "sklearn.ensemble.AdaBoostClassifier"]},
        selector='max.accuracy',
        inputs={"dataset"         : ingest.outputs[DATASET]},
        labels={"commit": this_project.params.get('commit', '')},
        outputs=['model', 'test_set'])

    # test and visualize our model
    test = funcs["test"].as_step(
        name="test",
        handler="test_classifier",
        params={"label_column": LABELS,
                "new_model_path" : train.outputs['model'],
                "existing_model_path" : existing_model_path,
                "comparison_metric": "accuracy",
                "post_github" : True},
        inputs={"test_set"    : train.outputs['test_set']})

#     # deploy our model as a serverless function
#     deploy = funcs["serving"].deploy_step(models={f"{DATASET}_v1": train.outputs['model']},
#                                           tag=this_project.params.get('commit', 'v1'))

#     # test out new model server (via REST API calls)
#     tester = funcs["live_tester"].as_step(name='model-tester',
#         params={'addr': deploy.outputs['endpoint'], 'model': f"{DATASET}_v1"},
#         inputs={'table': train.outputs['test_set']})

Overwriting /User/mlrun-github-actions-demo/project/pipelines/train.py


### Add GitHub secret

In [302]:
project.with_secrets("file", "ghtoken.txt")

<mlrun.projects.project.MlrunProject at 0x7f0c7464e850>

### Save Pipeline

In [303]:
project.set_workflow("train", "pipelines/train.py")
project.save()

### Pipeline

In [304]:
run_id = project.run("train", arguments={"existing_model_path":"store://models/gitops-project/train_model#2@139f410a-70b0-489d-a748-593167757909"}, artifact_path=artifact_path, dirty=True, watch=True)

> 2021-07-30 23:16:50,016 [info] Pipeline run id=3d3a8666-a8af-4720-853b-1646dd506e4d, check UI or DB for progress
> 2021-07-30 23:16:50,016 [info] waiting for pipeline run completion


uid,start,state,name,results,artifacts
...dd71d748,Jul 30 23:17:40,completed,test,accuracy-new_model=0.9333333333333333test-error-new_model=0.06666666666666667f1-new_model=0.9137254901960784precision-new_model=0.8888888888888888recall-new_model=0.9629629629629629accuracy-existing_model=0.9333333333333333test-error-existing_model=0.06666666666666667f1-existing_model=0.9137254901960784precision-existing_model=0.8888888888888888recall-existing_model=0.9629629629629629,test_set_preds-new_modeltest_set_preds-existing_model
...d0e1de92,Jul 30 23:17:12,completed,train,best_iteration=1accuracy=0.975609756097561test-error=0.024390243902439025auc-micro=0.9979179060083283auc-weighted=0.9966358284272497f1-score=0.9721739130434783precision_score=0.9743589743589745recall_score=0.9722222222222222,test_setconfusion-matrixfeature-importancesprecision-recall-multiclassroc-multiclassmodeliteration_results
...bc106b31,Jul 30 23:17:13,completed,summary,,histogramsviolinimbalanceimbalance-weights-veccorrelation-matrixcorrelation
...166f0f24,Jul 30 23:16:59,completed,get-data,,iris_dataset
