# Project management

In [1]:
from os import path, getenv
from mlrun import new_project

project_name = '-'.join(filter(None, ['properties-management']))
project_path = path.abspath('conf')
project = new_project(project_name, project_path, init_git=True)

print(f'Project path: {project_path}\nProject name: {project_name}')

Project path: /User/igztraining/mlrun/Day-4/conf
Project name: properties-management


In [2]:
from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io

# Target location for storing pipeline artifacts
artifact_path = path.abspath('jobs')
# MLRun DB path or API service URL
mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'

print(f'Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}')

Artifacts path: /User/igztraining/mlrun/Day-4/jobs
MLRun DB path: http://mlrun-api:8080


# Add function to project

In [3]:
project.set_function('/User/igztraining/mlrun/Day-4/modules/function.yaml',name='properties-handler')

<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f943c849b50>

In [4]:
project.functions

[{'url': '/User/igztraining/mlrun/Day-4/modules/function.yaml',
  'name': 'properties-handler'}]

# Run function as part of a pipeline

In [5]:
import kfp
from kfp import dsl
from mlrun import run_pipeline

In [6]:
%%writefile {path.join(project_path, 'workflow.py')}

from kfp import dsl
from mlrun import mount_v3io

@dsl.pipeline(
    name = 'Property Management',
    description = 'Condo reviews'
)

# Configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        f.apply(mount_v3io())


def kfpipeline(
    source_file :str = ""
) -> None:
    
    save_properties = funcs['properties-handler'].as_step(handler='save_property',
                            params={"source_file" : source_file,
                                    "target_file" : "/User/igztraining/mlrun/data/condos.csv" },
                            outputs=['condo_properties'])
    
    
    filter_properties = funcs['properties-handler'].as_step(handler='filter_properties',
                                   params={"city" : "SACRAMENTO",
                                           "target_dataset" : "sacramento_condos" },
                                   inputs={"source_data":save_properties.outputs["condo_properties"]})

Overwriting /User/igztraining/mlrun/Day-4/conf/workflow.py


In [7]:
# Register the workflow file as "main"
project.set_workflow('main', 'workflow.py')

In [8]:
project.save()

In [9]:
arguments = {"source_file" : "/User/igztraining/mlrun/Day-4/data/demo.csv"}
run_id = project.run(
    'main',
    arguments=arguments, 
    artifact_path=path.abspath(path.join('pipeline','{{workflow.uid}}')), 
    dirty=True)

> 2020-09-29 17:57:03,145 [info] using in-cluster config.


> 2020-09-29 17:57:03,475 [info] Pipeline run id=3f57b767-74e8-49e9-8907-be4bc7ca84d4, check UI or DB for progress
