In [1]:
import mlrun
from sys import path

# Load the project with configuration

project_name_base = 'widsdb2'
project_path = 'conf'
widsdb2_proj = mlrun.projects.load_project(project_path,  clone=True)

project = widsdb2_proj




In [2]:
#widsdb2_proj.functions
project = widsdb2_proj
names = [f.get('name') for f in widsdb2_proj.functions]
print(names)

['prep', 'train-wids', 'describe', 'test-classifier', 'tstprep', 'lightgbm-serving', 'live_tester', 'qnaserving']


In [8]:
livetester = project.set_function('hub://v2_model_tester', 'live_tester')

project.save()

In [3]:
%%writefile {'conf/wf.py' }

from kfp import dsl
import mlrun
from mlrun.platforms import auto_mount


funcs = {}
DATASET = 'train_enc'
TST_DATASET = 'test_enc'
LABELS =  'diabetes_mellitus'
MODEL = 'lgbm_model'
test_src = 'v3io:///projects/widsdb2/artifacts/raw_test_data.csv'
train_src =  'v3io:///projects/widsdb2/artifacts/raw_train_data.csv'


# Configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(auto_mount())

# Create a Kubeflow Pipelines pipeline
@dsl.pipeline(
    name="WidsDB2",
    description="This workflow implements the pipeline for data preprocessing, training model "
                "serving for Widsdb2 dataset \n"
                
)

def kfpipeline(source_url=train_src, test_url=test_src):

    # Ingest the data set
    ingest = funcs['prep'].as_step(
        name="prep",
        handler='trdata_prep',
        inputs={'src': source_url},
        outputs=[DATASET])
    
     # Ingest the data set
    test = funcs['tstprep'].as_step(
        name="tstprep",
        handler='tstdata_prep',
        inputs={'src': test_url},
        outputs=[TST_DATASET])
    
      # Train a model   
    train = funcs["train-wids"].as_step(
        name="train-wids",
        params={"label_column": LABELS},
        inputs={"dataset": ingest.outputs[DATASET]},
        outputs=['model', 'test_set'])
  
     # Deploy the model as a serverless function
    deploy = funcs["lightgbm-serving"].deploy_step(
        models={f"{MODEL}_v1": train.outputs['model']})
   
    deployqna = funcs["qnaserving"].deploy_step(
        models={f"bertqa_model_v1": '/v3io/projects/widsdb2/util/qa.pkl'})
  


    #test out new model server (via REST API calls)
    tester = funcs["live_tester"].as_step(name='model-tester',
        params={'addr': deploy.outputs['endpoint'], 'model': f"{MODEL}_v1", 'label_column':LABELS},
        inputs={'table': train.outputs['test_set']})
    
    
           

Overwriting conf/wf.py


In [4]:
# Register the workflow file as "infer"
project.set_workflow('infer', 'wf.py')


In [5]:
project.save()

In [6]:
import os 
from os import environ, path
from mlrun import mlconf
mlconf.artifact_path


'v3io:///projects/{{run.project}}/artifacts'

In [7]:
pipeline_path = mlconf.artifact_path

run_id = project.run(
    'infer',
    arguments={}, 
    artifact_path=os.path.join(pipeline_path, "pipeline", '{{wf.uid}}'),
    dirty=True,
    watch=True)



> 2021-07-06 12:48:57,074 [info] using in-cluster config.


> 2021-07-06 12:48:58,093 [info] Pipeline run id=e34aa5d6-ba4e-427b-bbb2-a569fab7b224, check UI or DB for progress
> 2021-07-06 12:48:58,094 [info] waiting for pipeline run completion


ValueError: Value must have type '<class 'int'>'