In [1]:
import mlrun
from sys import path

# Load the project with configuration

project_name_base = 'widsdb2'
project_path = 'conf'
widsdb2_proj = mlrun.projects.load_project(project_path,  clone=True)

project = widsdb2_proj




In [8]:
livetester = project.set_function('hub://v2_model_tester', 'live_tester')

project.save()

In [9]:
%%writefile {'conf/wf.py' }

from kfp import dsl
import mlrun
from mlrun.platforms import auto_mount


funcs = {}
DATASET = 'train_enc'
TST_DATASET = 'test_enc'
LABELS =  'diabetes_mellitus'
MODEL = 'lgbm_model'
test_src = 'v3io:///projects/widsdb2/artifacts/raw_test_data.csv'
train_src =  'v3io:///projects/widsdb2/artifacts/raw_train_data.csv'


# Configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(auto_mount())

# Create a Kubeflow Pipelines pipeline
@dsl.pipeline(
    name="WidsDB2",
    description="This workflow implements the pipeline for data preprocessing, training model "
                "serving for Widsdb2 dataset \n"
                
)

def kfpipeline(source_url=train_src, test_url=test_src):

    # Ingest the data set
    ingest = funcs['prep'].as_step(
        name="prep",
        handler='trdata_prep',
        inputs={'src': source_url},
        outputs=[DATASET])
    
     # Ingest the data set
    test = funcs['tstprep'].as_step(
        name="tstprep",
        handler='tstdata_prep',
        inputs={'src': test_url},
        outputs=[TST_DATASET])
    
      # Train a model   
    train = funcs["train-wids"].as_step(
        name="train-wids",
        params={"label_column": LABELS},
        inputs={"dataset": ingest.outputs[DATASET]},
        outputs=['model', 'test_set'])
  
     # Deploy the model as a serverless function
    deploy = funcs["lightgbm-serving"].deploy_step(
        models={f"{MODEL}_v1": train.outputs['model']})
   
    #test out new model server (via REST API calls)
    tester = funcs["live_tester"].as_step(name='model-tester',
        params={'addr': deploy.outputs['endpoint'], 'model': f"{MODEL}_v1", 'label_column':LABELS},
        inputs={'table': train.outputs['test_set']})
    
    
           

Overwriting conf/wf.py


In [10]:
# Register the workflow file as "infer"
project.set_workflow('infer', 'wf.py')


In [11]:
project.save()

In [12]:
import os 
from os import environ, path
from mlrun import mlconf
mlconf.artifact_path


'v3io:///projects/{{run.project}}/artifacts'

In [13]:
pipeline_path = mlconf.artifact_path

run_id = project.run(
    'infer',
    arguments={}, 
    artifact_path=os.path.join(pipeline_path, "pipeline", '{{wf.uid}}'),
    dirty=True,
    watch=True)



> 2021-07-01 12:54:43,405 [info] Pipeline run id=c9cf3637-9cd0-418a-b209-acfbd1af64d6, check UI or DB for progress
> 2021-07-01 12:54:43,406 [info] waiting for pipeline run completion


uid,start,state,name,results,artifacts
...5bca5ebb,Jul 01 12:58:36,completed,model-tester,total_tests=20errors=0match=0avg_latency=70011min_latency=25155max_latency=96646,latency
...0bb53866,Jul 01 12:56:25,completed,train-wids,,train_settest_setmodel
...2be09b2e,Jul 01 12:54:56,completed,prep,,train_enc
...58c5daaf,Jul 01 12:54:56,completed,tstprep,,test_enc
