# Part 2: Pipeline

### Demo Overview

- Review file structure

- Create project

- Build Docker image (optional)

- Add functions to project

- Add pipelines to project

- Run pipeline

### File Structure

In [1]:
!find ./project | grep -v .ipynb_checkpoints | grep -v __pycache__  | sed -e "s/[^-][^\/]*\// |/g" -e "s/|\([^ ]\)/| - \1/"

 | - project
 | | - components
 | | | - get_prep_data.py
 | | | - train_eval_model.py
 | | - pipelines
 | | | - pipeline.py
 | | - project.yaml


### Setup MLRun Project

In [2]:
import os
from os import path
from mlrun import set_environment, new_project, mlconf

# Set the default environment configuration
project_name, artifact_path = set_environment(project="onboarding")

# Create project
project_path = path.abspath("project")
project = new_project(name=project_name, context=project_path)

print(f"Project name: {project_name}\nProject path: {project_path}")
print(f"Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}")

Project name: onboarding
Project path: /User/igz_repos/igz-sycomp/project
Artifacts path: v3io:///projects/{{run.project}}/artifacts
MLRun DB path: http://mlrun-api:8080


### Build Docker Image (Optional)

In [3]:
# image = f"docker-registry.{os.getenv('IGZ_NAMESPACE_DOMAIN')}:80/feature-store-image"
image = "mlrun/ml-models"

In [4]:
# Build Docker Image (only needs to be run once)
from mlrun import new_function

build_image = new_function(name="build-image", kind="job")
build_image.build_config(
    image=image,
    base_image="mlrun/ml-models",
    commands=["pip install storey"]
)
# build_image.deploy(with_mlrun=False)

### Add Functions to Project

`project.set_function(func, name, kind, image)`

In [5]:
project.set_function(func="components/get_prep_data.py",
                     name="get-prep-data",
                     kind="job",
                     image=image)
project.set_function(func="components/train_eval_model.py",
                     name="train-eval-model",
                     kind="job",
                     image=image)
project.set_function(func="hub://v2_model_server",
                     name="deploy-model",
                     kind="nuclio",
                     image=image)

<mlrun.runtimes.serving.ServingRuntime at 0x7fcd31758310>

### Add Pipelines to Project

`project.set_workflow(name, workflow_path)`

In [6]:
project.set_workflow(name="main", workflow_path="pipelines/pipeline.py")
project.save()

In [7]:
!cat project/project.yaml

kind: project
metadata:
  name: onboarding
spec:
  functions:
  - url: components/get_prep_data.py
    name: get-prep-data
    kind: job
    image: mlrun/ml-models
  - url: components/train_eval_model.py
    name: train-eval-model
    kind: job
    image: mlrun/ml-models
  - url: hub://v2_model_server
    name: deploy-model
    kind: nuclio
    image: mlrun/ml-models
  workflows:
  - name: main
    path: pipelines/pipeline.py
  artifacts: []
  artifact_path: v3io:///projects/{{run.project}}/artifacts
  source: ''
  subpath: ''
  origin_url: ''
  desired_state: online


### Run Pipeline

Py: `project.run(name, arguments, artifact_path)`

CLI: `mlrun project ./project --run <NAME>`

In [8]:
run_id = project.run(
    name="main",
    arguments={"dataset": "/User/igz_repos/igz-sycomp/assets/heart.csv"},
    artifact_path=artifact_path,
    dirty=True
)

> 2021-09-15 18:36:38,841 [info] using in-cluster config.


> 2021-09-15 18:36:39,301 [info] Pipeline run id=c6ded426-416f-4100-b372-ac6780fa5b64, check UI or DB for progress


# Part 3: Model Inference

### Get Model Endpoint

In [9]:
import json
import requests
import numpy as np
import pandas as pd

url = 'http://nuclio-{}-{}:8080'.format(project.name, project.func('deploy-model').metadata.name)
url += "/v2/models/heart_disease_model/predict"
url

'http://nuclio-onboarding-v2-model-server:8080/v2/models/heart_disease_model/predict'

### Payload for Model

In [10]:
X_test = [[0.0, 0.0, 1.0, 0.0, 0.5833333333333334, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.9342105263157896, 0.6965811965811965, 0.0, 0.3157894736842104, 0.2272727272727272, 1.0],
          [0.0, 0.0, 1.0, 0.0, 0.6041666666666666, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.2631578947368422, 0.8205128205128205, 1.0, 0.4561403508771928, 1.0, 1.0],
          [0.0, 0.0, 1.0, 0.0, 0.25, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.4736842105263159, 0.3333333333333333, 0.0, 0.7368421052631577, 0.3181818181818182, 0.0]]

y_test = [0, 0, 1]

In [11]:
data = json.dumps({"inputs" : X_test})
data

'{"inputs": [[0.0, 0.0, 1.0, 0.0, 0.5833333333333334, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.9342105263157896, 0.6965811965811965, 0.0, 0.3157894736842104, 0.2272727272727272, 1.0], [0.0, 0.0, 1.0, 0.0, 0.6041666666666666, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.2631578947368422, 0.8205128205128205, 1.0, 0.4561403508771928, 1.0, 1.0], [0.0, 0.0, 1.0, 0.0, 0.25, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.4736842105263159, 0.3333333333333333, 0.0, 0.7368421052631577, 0.3181818181818182, 0.0]]}'

### Inference Model via HTTP

In [12]:
%%time
predictions = requests.post(url=url, data=data).json()
predictions

CPU times: user 8.49 ms, sys: 163 µs, total: 8.66 ms
Wall time: 19.3 ms


{'id': 'effd4ea2-b608-45c7-9e2c-fa411b72aacd',
 'model_name': 'heart_disease_model',
 'outputs': [0, 0, 1]}

In [13]:
y_test

[0, 0, 1]