# CloudEdge DataEngineer (Inference Stage)

****Inference Scenarios****

| scenarios | reference app | framework | model/dataset |
| ---- | ---- | ---- | ---- |
| batch-inference-workflow | [scenarios/job-pipeline](https://github.com/peiniliu/inference/tree/dev/vision/classification_and_detection/scenarios/job-pipeline) | tensorflow | resnet/dumy |

## Architecture

In [1]:
import sys
import os
sys.path.insert(0,'../..')

from scanflow.client import ScanflowClient
from scanflow.client import ScanflowTrackerClient
from scanflow.client import ScanflowDeployerClient

In [None]:
from scanflow.tools import env
print(env.get_env("SCANFLOW_SERVER_URI"))
print(env.get_env("SCANFLOW_TRACKER_URI"))
#print(env.get_env("SCANFLOW_TRACKER_LOCAL_URI"))
print(env.get_env("MLFLOW_S3_ENDPOINT_URL"))
print(env.get_env("AWS_ACCESS_KEY_ID"))
print(env.get_env("AWS_SECRET_ACCESS_KEY"))
print(env.get_env("DOCKER_REGISTRY"))

In [None]:
# App folder - Must point to the folder includeing all 'dataengineer' and 'datascience' folders
# for cloudedge_reactive_migration, allocated in /examples/cloudedge_reactive_migration
app_dir = env.get_env("REACTIVE_MIGRATION_DATAENGINEER_APP_DIR")
app_name = "cloudedge_reactive_migration"
team_name = "dataengineer"

# Initialize the Scanflow Client
client = ScanflowClient(
    #if you defined "SCANFLOW_SERVER_URI", you dont need to provide this
    registry=env.get_env("DOCKER_REGISTRY"),
    verbose=True)

## Batch-inference-graph for prediction

### Predictor

In [None]:
# Predictor stages
# - Executor 1: Data retrieval from Prometheus
executor1 = client.ScanflowExecutor(
    name="data-retrieval",
    mainfile="data-retrieval.py",
    parameters={
        'app_name': app_name,
        'team_name': team_name
    }
)

# Stages dependencies
# TODO: define once more stages have been developed

# Predictor workflow: batch-inference-reactive-graph
workflow1 = client.ScanflowWorkflow(
    name="batch-inference-reactive-graph",
    nodes=[executor1],
    edges=[],
    type="batch",
    cron="*/5 * * * *",
    output_dir="/workflow"
)

### Planner

In [None]:
trigger = client.ScanflowAgentSensor_IntervalTrigger(minutes=5)
sensor = client.ScanflowAgentSensor(
    name="reactive_watch_qos",
    isCustom=True,
    func_name="reactive_watch_qos",
    trigger=trigger,
    kwargs={
        'frequency': 300
    }
)
planner = client.ScanflowAgent(
    name="planner",
    template="planner",
    sensors=[sensor]
)

### Compose the Scanflow Application

In [None]:
app = client.ScanflowApplication(
    app_name=app_name,
    app_dir=app_dir,
    team_name=team_name,
    workflows=[workflow1],
    agents=[planner]
)

### DEBUG: show application config

In [None]:
dic = app.to_dict()

### Build the Scanflow Application
- This step builds the Docker images for all the Scanflow executors and uploads them to the container registry (currently hardcoded in the `scanflow` module)

In [None]:
# Define the Scanflow Tracker Port (32766)
build_app = client.build_ScanflowApplication(
    app=app,
    trackerPort=32766
)