In [None]:
!pip uninstall -y mlrun
!pip install git+https://github.com/mlrun/mlrun.git@development

In [1]:
# nuclio: ignore
# if the nuclio-jupyter package is not installed run !pip install nuclio-jupyter
import nuclio 

In [2]:
%nuclio cmd -c pip install pandas
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


In [3]:
import os

def training(context, p1=1, p2=2):
    # access input metadata, values, and inputs
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    context.logger.info('started training')
    
    # do some training 
    
    # log the run results (scalar values)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)
    
    # add a lable/tag to this run 
    context.set_label('category', 'tests')
    
    # log a simple artifact + label the artifact 
    context.log_artifact('model.txt', body=b'abc is 123', labels={'framework': 'xgboost'})

def validation(context, model):
    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    #model = context.get_object('model', model)
    print('file - {}:\n{}\n'.format(model.url, model.get()))
    
    context.logger.info('started validation')
    
    context.log_artifact('validation.html', body=b'<b> validated </b>', viewer='web-app')

def listfiles(context, path='/'):
    l = os.listdir(path)
    print(l)
    return '{} contain {} files'.format(path, len(l))

## Load MLRUN and specify defaults 

In [4]:
# nuclio: end-code
# (end-code marker tells nuclio to stop parsing the notebook from this cell)

# set mlrun db path (can also be specified in run_start command)
%env MLRUN_META_DBPATH=/User/mlrun

from mlrun import new_function, make_nuclio_job, NewRun
from mlrun.platforms import mount_v3io

env: MLRUN_PACKAGE_PATH=git+https://github.com/mlrun/mlrun.git@development
env: MLRUN_META_DBPATH=/User/mlrun


## Test the code locally
the functions above can be tested locally, parameters, inputs, and outputs can be specified in the API or the runspec object<br>
we create a `runner` which defined the runtime environment (type, code, image, ..) and `run` a tasks/experiments using that runner <br>
(we use the `local` runner by default, later on we will use a `job` runner, and can use other runners like Horovod, Spark, Dask, Nuclio, ..)

in each run we can specify the function, inputs, parameters/hyper-parameters, etc. (check the `RunTemplate` class for details)<br>
in Jupyter runs print a summary table with metadata and links to data artifacts

In [5]:
runner = new_function()
list_run = runner.run(handler=listfiles, params={'path': '/User'})

[mlrun] 2019-09-07 22:29:32,833 starting run None uid=262749cb18184fffa9f286b1e6e37044
['.bashrc', '.config', '.igz', '.ipynb_checkpoints', '.ipython', '.jupyter', '.kube', '.local', '.pythonlibs', '.viminfo', '.vimrc', 'LICENSE', 'README.md', 'assets', 'demos', 'getting-started', 'horovod', 'igz-tutorials-get.sh', 'mlrun', 'update-tutorials.ipynb', 'v3io', 'welcome.ipynb']



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...e37044,0,Sep 07 22:29:33,completed,,kind=handlerowner=iguaziohost=jupyter-rngbsdr6ab-f02en-5dd7cd96db-xfzxr,,path=/User,return=/User contain 22 files,


[mlrun] 2019-09-07 22:29:33,733 run executed, status=created


## Running and linking multiple tasks
in the next example we run two functions, `training` and `validation` and we pass the result from one to the other.<br>
we will see in the 'job' example that linking works even when the tasks run on different processes or containers, or in a workflow.

In [6]:
train_run = runner.run(handler=training, params={'p1': 5})
model_path = train_run.output('model.txt')
validation_run = runner.run(handler=validation, inputs={'model': model_path})

[mlrun] 2019-09-07 22:29:33,741 starting run None uid=d4e5f2d13b7140a7bd1b3c11da3958af
[mlrun] 2019-09-07 22:29:33,768 started training
Run:  (uid=d4e5f2d13b7140a7bd1b3c11da3958af)
Params: p1=5, p2=2



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...3958af,0,Sep 07 22:29:33,completed,,kind=handlerowner=iguaziohost=jupyter-rngbsdr6ab-f02en-5dd7cd96db-xfzxrcategory=tests,,p1=5,accuracy=10loss=15,model.txt


[mlrun] 2019-09-07 22:29:33,895 run executed, status=created
[mlrun] 2019-09-07 22:29:33,896 starting run None uid=834298b2293e4ceb933c917547e7b81d
[mlrun] 2019-09-07 22:29:33,924 started validation
Run:  (uid=834298b2293e4ceb933c917547e7b81d)
file - model.txt:
b'abc is 123'




uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...e7b81d,0,Sep 07 22:29:33,completed,,kind=handlerowner=iguaziohost=jupyter-rngbsdr6ab-f02en-5dd7cd96db-xfzxr,model,,,validation.html


[mlrun] 2019-09-07 22:29:34,019 run executed, status=created


## Define cluster jobs and build images 
in order to use in a cluster we need to package our code and dependencies<br>
the `make_nuclio_job` call will automatically form a `Job` with list of dependencies and runtime configuration<br>
you can apply KubeFlow modifiers to configure resources like Volumes, `mount_v3io()` adds an iguazio v3io volume (Home of current user) to the job 

the `build_image()` command is optional it pre builds all the dependencies, so the runs will be faster. note the code and params can be updated per run

In [None]:
# create a job from the notebook, attache it to iguazio data fabric (v3io)
job = make_nuclio_job().apply(mount_v3io())

# prepare an image from the dependencies, so we wont need to build the image every run 
job.build_image(image='mlrun/nuctest:latest')

### Run the job on the cluster (build or use pre-built image)
note the listfiles call will return the same results as in the local run since the Job shares the same filesystem <br>
`with_code()` will inject the latest code to the job, in case we made changes (it doesnt require a new build)

In [8]:
job.with_code().run(handler=listfiles, params={'path': '/User'})

[mlrun] 2019-09-07 22:31:24,102 starting run None uid=4b56d99167e14e3294dad35a670ea3d1
[mlrun] 2019-09-07 22:31:24,125 using in-cluster config.
[mlrun] 2019-09-07 22:31:24,126 using in-cluster config.
[mlrun] 2019-09-07 22:31:24,146 Pod mlrun-vrzfx created
....
[mlrun] 2019-09-07 22:31:33,200 starting run None uid=4b56d99167e14e3294dad35a670ea3d1
['.bashrc', '.config', '.igz', '.ipynb_checkpoints', '.ipython', '.jupyter', '.kube', '.local', '.pythonlibs', '.viminfo', '.vimrc', 'LICENSE', 'README.md', 'assets', 'demos', 'getting-started', 'horovod', 'igz-tutorials-get.sh', 'mlrun', 'update-tutorials.ipynb', 'v3io', 'welcome.ipynb']

[mlrun] 2019-09-07 22:31:33,306 run executed, status=created


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...0ea3d1,0,Sep 07 22:31:33,completed,,kind=localowner=iguaziohost=mlrun-vrzfx,,path=/User,return=/User contain 22 files,


[mlrun] 2019-09-07 22:31:38,356 run executed, status=created


<mlrun.model.RunObject at 0x7fef9e9f06d8>

In [9]:
# define a run template, specify the artifacts output path, and add a label (can be used for search later)
run_base = NewRun(out_path='/User/mlrun/data').set_label('stage', 'dev')

In [10]:
# run our training task, with hyper params, and select the one with max accuracy
run = run_base.copy().with_hyper_params({'p1': [2,6,4]}, selector='max.accuracy')
train_run = job.with_code().run(run, handler=training, name='my-training', params={'p1': 9})
model_path = train_run.output('model.txt')

[mlrun] 2019-09-07 22:31:57,923 starting run my-training uid=5a754ea7f4c240688ff4079e68d1eb5a
[mlrun] 2019-09-07 22:31:57,946 using in-cluster config.
[mlrun] 2019-09-07 22:31:57,965 Pod my-training-84n8q created
..
[mlrun] 2019-09-07 22:32:02,010 starting run my-training uid=5a754ea7f4c240688ff4079e68d1eb5a
[mlrun] 2019-09-07 22:32:02,089 started training
Run: my-training (uid=5a754ea7f4c240688ff4079e68d1eb5a-1)
Params: p1=2, p2=2

[mlrun] 2019-09-07 22:32:02,157 run executed, status=created
[mlrun] 2019-09-07 22:32:07,329 using in-cluster config.
[mlrun] 2019-09-07 22:32:07,339 Pod my-training-lftpm created
..
[mlrun] 2019-09-07 22:32:11,626 starting run my-training uid=5a754ea7f4c240688ff4079e68d1eb5a
[mlrun] 2019-09-07 22:32:11,707 started training
Run: my-training (uid=5a754ea7f4c240688ff4079e68d1eb5a-2)
Params: p1=6, p2=2

[mlrun] 2019-09-07 22:32:11,797 run executed, status=created
[mlrun] 2019-09-07 22:32:16,965 using in-cluster config.
[mlrun] 2019-09-07 22:32:16,975 Pod my-tr

uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...d1eb5a,1,Sep 07 22:32:02,completed,my-training,stage=devkind=localowner=iguaziohost=my-training-84n8qcategory=tests,,p1=2,accuracy=4loss=6,model.txt
...d1eb5a,2,Sep 07 22:32:11,completed,my-training,stage=devkind=localowner=iguaziohost=my-training-lftpmcategory=tests,,p1=6,accuracy=12loss=18,model.txt
...d1eb5a,3,Sep 07 22:32:21,completed,my-training,stage=devkind=localowner=iguaziohost=my-training-p784pcategory=tests,,p1=4,accuracy=8loss=12,model.txt
...d1eb5a,0,Sep 07 22:31:57,completed,my-training,stage=devkind=jobowner=iguazio,,p1=9,best_iteration=2accuracy=12loss=18,model.txtiteration_results.csv


[mlrun] 2019-09-07 22:32:26,755 run executed, status=created


In [11]:
# running validation, use the best model result from the previos step 
job.run(run_base, handler=validation, name='my-validation', inputs={'model': model_path})

[mlrun] 2019-09-07 22:32:26,760 starting run my-validation uid=cf4f04956ad44363ba74a8af8840f481
[mlrun] 2019-09-07 22:32:26,781 using in-cluster config.
[mlrun] 2019-09-07 22:32:26,796 Pod my-validation-tkfbz created
..
[mlrun] 2019-09-07 22:32:31,175 starting run my-validation uid=cf4f04956ad44363ba74a8af8840f481
[mlrun] 2019-09-07 22:32:31,268 started validation
Run: my-validation (uid=cf4f04956ad44363ba74a8af8840f481)
file - /User/mlrun/data/2/model.txt:
b'abc is 123'


[mlrun] 2019-09-07 22:32:31,334 run executed, status=created


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...40f481,0,Sep 07 22:32:31,completed,my-validation,stage=devkind=localowner=iguaziohost=my-validation-tkfbz,model,,,validation.html


[mlrun] 2019-09-07 22:32:36,491 run executed, status=created


<mlrun.model.RunObject at 0x7fefd06297f0>

In [12]:
# list all jobs
!mlrun get po 

[mlrun] 2019-09-07 22:32:58,685 using in-cluster config.
state      started          type     name
Running    Sep 07 22:09:23  dask     dask-iguazio-a06adb78-4wxh57
Running    Sep 07 22:25:29  mpijob   ml-2f03ba53-launcher-2q4pm
Running    Sep 07 22:25:27  mpijob   ml-2f03ba53-worker-0
Succeeded  Sep 07 20:57:11  mpijob   ml-5f9218f6-launcher-ph2xb
Succeeded  Sep 07 22:04:11  build    mlrun-build-ktdxw
Succeeded  Sep 07 22:29:55  build    mlrun-build-p2nh6
Succeeded  Sep 07 22:07:53  build    mlrun-build-pxrmm
Succeeded  Sep 07 21:56:06  build    mlrun-build-vr55p
Pending    Sep 04 21:34:18  job      mlrun-clwvx
Succeeded  Sep 07 22:31:24  job      mlrun-vrzfx
Succeeded  Sep 07 19:14:41  mpijob   mpij-f308cbfa-launcher-kl2ct
Succeeded  Sep 07 22:31:57  job      my-training-84n8q
Succeeded  Sep 07 22:32:07  job      my-training-lftpm
Succeeded  Sep 07 22:32:16  job      my-training-p784p
Succeeded  Sep 07 22:32:26  job      my-validation-tkfbz


In [13]:
# check job logs
!mlrun watch mlrun-vrzfx

[mlrun] 2019-09-07 22:34:02,037 using in-cluster config.

[mlrun] 2019-09-07 22:31:33,200 starting run None uid=4b56d99167e14e3294dad35a670ea3d1
['.bashrc', '.config', '.igz', '.ipynb_checkpoints', '.ipython', '.jupyter', '.kube', '.local', '.pythonlibs', '.viminfo', '.vimrc', 'LICENSE', 'README.md', 'assets', 'demos', 'getting-started', 'horovod', 'igz-tutorials-get.sh', 'mlrun', 'update-tutorials.ipynb', 'v3io', 'welcome.ipynb']

[mlrun] 2019-09-07 22:31:33,306 run executed, status=created
Pod mlrun-vrzfx last status is: succeeded
