# CloudEdge DataEngineer (Inference Stage)

****Inference Scenarios****

| scenarios | reference app | framework | model/dataset |
| ---- | ---- | ---- | ---- |
| batch-inference-workflow | [scenarios/job-pipeline](https://github.com/peiniliu/inference/tree/dev/vision/classification_and_detection/scenarios/job-pipeline) | tensorflow | resnet/dumy |

## Architecture

In [15]:
# Only for debug purposes, don't enable and push them!!!
#%env WORKDIR=/your/repository/root/path
#%env REACTIVE_MIGRATION_DATAENGINEER_APP_DIR=examples/cloudedge-reactive-migration/dataengineer
#%env SCANFLOW_SERVER_URI=http://10.17.252.5:32766
#%env SCANFLOW_TRACKER_URI=http://10.17.252.5:32766
#%env MLFLOW_S3_ENDPOINT_URI=http://10.17.252.5:32910
#%env AWS_ACCESS_KEY_ID=admin
#%env AWS_SECRET_ACCESS_KEY=scanflow123
#%env DOCKER_REGISTRY=registry.gitlab.bsc.es/datacentric-computing/cloudskin-project/cloudskin-registry

In [16]:
import sys
import os
sys.path.insert(0,'../..')

from scanflow.client import ScanflowClient
from scanflow.client import ScanflowTrackerClient
from scanflow.client import ScanflowDeployerClient

In [17]:
from scanflow.tools import env
print(env.get_env("SCANFLOW_SERVER_URI"))
print(env.get_env("SCANFLOW_TRACKER_URI"))
print(env.get_env("MLFLOW_S3_ENDPOINT_URL"))
print(env.get_env("AWS_ACCESS_KEY_ID"))
print(env.get_env("AWS_SECRET_ACCESS_KEY"))
print(env.get_env("DOCKER_REGISTRY"))

In [18]:
# App folder - Must point to the folder includeing all 'dataengineer' and 'datascience' folders
# for cloudedge-reactive-migration, allocated in examples/cloudedge-reactive-migration
app_dir = os.path.join(env.get_env('WORKDIR'), env.get_env('REACTIVE_MIGRATION_DATAENGINEER_APP_DIR'))
print(app_dir)
app_name = "cloudedge-reactive-migration"
team_name = "dataengineer"

# Initialize the Scanflow Client
client = ScanflowClient(
    #if you defined "SCANFLOW_SERVER_URI", you dont need to provide this
    registry=env.get_env("DOCKER_REGISTRY"),
    verbose=True)

## Batch-inference-graph for prediction

### Predictor

In [19]:
# Predictor stages
# - Executor 1: Data retrieval from Prometheus
# - Executor 2: Data pre-processing
# - Executor 3: QoS Predictor
executor1 = client.ScanflowExecutor(
    name="data-retrieval",
    mainfile="data-retrieval.py",
    parameters={
        'app_name': app_name,
        'team_name': team_name
    }
)

# Stages dependencies
# TODO: define them once other stages have been developed

# Predictor workflow: batch-inference-reactive-graph
# TODO: add missing executors and dependencies
workflow1 = client.ScanflowWorkflow(
    name="batch-inference-reactive-graph",
    nodes=[executor1],
    edges=[],
    type="batch",
    cron="*/5 * * * *",
    output_dir="/workflow"
)

### Planner

In [20]:
trigger = client.ScanflowAgentSensor_IntervalTrigger(minutes=5)
sensor = client.ScanflowAgentSensor(
    name="reactive_watch_qos",
    isCustom=True,
    func_name="reactive_watch_qos",
    trigger=trigger,
    kwargs={
        'frequency': 300
    }
)
planner = client.ScanflowAgent(
    name="planner",
    template="planner",
    sensors=[sensor]
)

### Compose the Scanflow Application

In [21]:
app = client.ScanflowApplication(
    app_name=app_name,
    app_dir=app_dir,
    team_name=team_name,
    workflows=[workflow1],
    agents=[planner]
)

### DEBUG: show application config

In [22]:
app.to_dict()

### Build the Scanflow Application
- This step builds the Docker images for all the Scanflow executors and uploads them to the container registry (currently hardcoded in the `scanflow` module)

In [23]:
# Define the Scanflow Tracker Port (32766)
build_app = client.build_ScanflowApplication(
    app=app,
    trackerPort=32766
)

### DEBUG: show built application config

In [24]:
build_app.to_dict()