# CloudEdge DataEngineer (Inference Stage)

****Inference Scenarios****

| scenarios | reference app | framework | model/dataset |
| ---- | ---- | ---- | ---- |
| batch-inference-workflow | [scenarios/job-pipeline](https://github.com/peiniliu/inference/tree/dev/vision/classification_and_detection/scenarios/job-pipeline) | tensorflow | resnet/dumy |

## Architecture

Make sure to set these environment variables in your session with the proper values. All of them are mandatory except:
- `DOCKER_REGISTRY`: if you plan to push the images to a private registry
- `DOCKER_TAG`: if you don't want to leave the default `latest` tag
- `DOCKER_REGISTRY_USERNAME`: if your private registry requires authentication
- `DOCKER_REGISTRY_PASSWORD`: if your private registry requires authentication

In [None]:
# # Only for debug purposes, don't leave them enable in the repository!!!
# %env WORKDIR=/root/cloudskin/data-connector
# %env KUBECONFIG_PATH=/root/.kube/config
# %env REACTIVE_MIGRATION_DATAENGINEER_APP_DIR=examples/cloudedge-reactive-migration/dataengineer
# %env SCANFLOW_SERVER_URI=http://10.0.26.8:32002
# %env SCANFLOW_TRACKER_URI=http://10.0.26.8:32002
# %env MLFLOW_S3_ENDPOINT_URL=http://10.0.26.8:32000
# # PostgreSQL URI with credentials
# %env SCANFLOW_TRACKER_STORAGE=postgresql://postgres:scanflow123@postgresql.scanflow-server/scanflow
# # MinIO API endpoint, not console!
# %env AWS_ACCESS_KEY_ID=admin
# %env AWS_SECRET_ACCESS_KEY=scanflow123
# %env DOCKER_REGISTRY=registry.gitlab.bsc.es/datacentric-computing/cloudskin-project/cloudskin-registry
# # If you use invalid characters for a tag, Scanflow will replace them with '-'
# %env DOCKER_TAG=feat/reactive-migration
# %env DOCKER_REGISTRY_USERNAME=cloudskin-scanflow-builds
# %env DOCKER_REGISTRY_PASSWORD=fake-password
# %env SCANFLOW_APP_NAME=cloudedge-migration-experiment
# %env SCANFLOW_TEAM_NAME=dataengineer
# # This is to avoid CI pipelines to deploy anything
# %env LOCAL_DEPLOY=1

## Pre-run cleanup

Make sure that the experiment isn't already running by removing its namespace

In [2]:
import sys
import os

# Make sure "scanflow" path is added in available module paths
sys.path.insert(0,'../..')

Let's define some useful Kubernetes client functions:

In [4]:
from kubernetes import client
from kubernetes import config
from kubernetes.client.rest import ApiException
from kubernetes.stream import stream
from scanflow.tools import env
import tarfile
import os
import subprocess
from time import time, sleep
import yaml


def delete_namespace_and_wait(client: client.CoreV1Api = None, namespace:str = None, timeout:int = 300):
    """
    Deletes a namespace and waits until its deletion is fully terminated.

    Parameters:
    - client: client.CoreV1Api - A Kubernetes API client; locally initialized if not provided
    - namespace: str - The name of the namespace to delete
    - timeout: int - Time to wait in seconds before giving up (default: 300)
    """
    if not client:
        client = client.CoreV1Api()
    
    try:
        # Delete the namespace
        client.delete_namespace(name=namespace)
        # Wait for the namespace to be completely deleted
        start_time = time()
        while True:
            try:
                # Try fetching the namespace, if it's still there
                response = client.read_namespace(name=namespace)
                print(f"Namespace '{namespace}' is still being deleted...")
            except ApiException as e:
                if e.status == 404:
                    # Namespace is deleted, exit loop
                    print(f"Namespace '{namespace}' has been successfully deleted.")
                    break
                else:
                    print(f"Error occurred: {e}")
                    raise
            # Check if timeout is reached
            if time() - start_time > timeout:
                print(f"Timeout reached: Namespace '{namespace}' still exists after {timeout} seconds.")
                break

            # Wait for some time before checking again
            sleep(5)
    
    except ApiException as e:
        print(f"Failed to delete namespace '{namespace}': {e}")


def deploy_pod_and_wait_for_completion(client: client.CoreV1Api = None, yaml_file: str = None, namespace: str = "default", timeout: int = 300) -> None:
    """
    Deploy a pod from a YAML manifest and wait until it reaches the Completed state.

    Parameters:
    - client: client.CoreV1Api - Kubernetes API client; locally initialized if not provided
    - namespace: str - Kubernetes namespace (default: "default")
    - timeout: int - Time to wait in seconds before giving up (default: 300)
    """

    if not client:
        client = client.CoreV1Api()

    # Load the YAML file
    if not yaml_file:
        print(f"Missing YAML file! Please make sure to provide a valid YAML file path")
        sys.exit(1)
    
    with open(yaml_file, 'r') as f:
        pod_manifest = yaml.safe_load(f)

    # Extract pod name from the manifest
    pod_name = pod_manifest['metadata']['name']

    try:
        # Create the pod
        print(f"Creating pod: {pod_name} in namespace: {namespace}")
        client.create_namespaced_pod(namespace=namespace, body=pod_manifest)

        # Wait for the pod to reach Completed state
        start_time = time()
        while True:
            try:
                # Get the pod's current status
                pod = client.read_namespaced_pod(name=pod_name, namespace=namespace)
                pod_phase = pod.status.phase
                print(f"Pod '{pod_name}' is currently in phase: {pod_phase}")
                
                if pod_phase == "Succeeded":
                    print(f"Pod '{pod_name}' has completed successfully (Succeeded).")
                    break
                elif pod_phase == "Failed":
                    print(f"Pod '{pod_name}' has failed.")
                    break
                
            except ApiException as e:
                print(f"Error fetching pod status: {e}")
                raise

            # Check if timeout is reached
            if time() - start_time > timeout:
                print(f"Timeout reached: Pod '{pod_name}' is not in Completed state after {timeout} seconds.")
                break

            # Wait for a few seconds before checking again
            sleep(5)

    except ApiException as e:
        print(f"Failed to create pod '{pod_name}': {e}")


def deploy_pod_and_wait(client: client.CoreV1Api = None, yaml_file: str = None, namespace: str = "default", timeout: int = 300) -> None:
    """
    Deploys a pod using a YAML manifest and waits until its state is 'Running'.

    Parameters:
    - client: client.CoreV1Api - Kubernetes API client; locally initialized if not provided
    - yaml_file: str - Path to the YAML file containing the pod manifest.
    - namespace: str - Kubernetes namespace (default: 'default').
    - timeout: int - Time to wait in seconds before timing out (default: 300).
    """

    # Load YAML file
    with open(yaml_file, 'r') as f:
        pod_manifest = yaml.safe_load(f)

    # Extract pod name from manifest
    pod_name = pod_manifest['metadata']['name']
    
    try:
        # Create the pod
        print(f"Creating pod: {pod_name} in namespace: {namespace}")
        client.create_namespaced_pod(namespace=namespace, body=pod_manifest)

        # Wait for the pod to reach Running state
        start_time = time()
        while True:
            try:
                # Get the pod's current status
                pod = client.read_namespaced_pod(name=pod_name, namespace=namespace)
                pod_phase = pod.status.phase
                print(f"Pod '{pod_name}' is currently in phase: {pod_phase}")
                
                if pod_phase == "Running":
                    print(f"Pod '{pod_name}' is now in Running state.")
                    break
                elif pod_phase == "Failed":
                    print(f"Pod '{pod_name}' has failed to start.")
                    break
                
            except ApiException as e:
                print(f"Error fetching pod status: {e}")
                raise

            # Check if timeout is reached
            if time() - start_time > timeout:
                print(f"Timeout reached: Pod '{pod_name}' is not in Running state after {timeout} seconds.")
                break

            # Wait for a few seconds before checking again
            sleep(5)

    except ApiException as e:
        print(f"Failed to create pod '{pod_name}': {e}")


def check_if_object_exists_and_ready(
    client: client.CoreV1Api = None, # Kubernetes API client; locally initialized if not provided
    object_type: str = "namespace",  # Type of Kubernetes object: 'namespace' or 'persistentVolumeClaim'
    name: str = "default",  # Name of the Kubernetes object (namespace or PVC)
    namespace: str = None  # Namespace where the object is located (only for PVC)
) -> bool:
    """
    Checks if a Kubernetes object (namespace or persistentVolumeClaim) exists and is ready.

    Parameters:
    - object_type: str - Type of Kubernetes object ('namespace' or 'persistentVolumeClaim').
    - name: str - Name of the Kubernetes object.
    - namespace: str - Namespace where the object is located (only relevant for PVCs).

    Returns:
    - bool: True if the object exists and is ready, False otherwise.
    """
    

    # Initialize API clients
    if not client:
        client = client.CoreV1Api()

    try:
        if object_type == "namespace":
            # Check if the namespace exists
            print(f"Checking if namespace '{name}' exists...")
            namespace_obj = client.read_namespace(name=name)
            if namespace_obj.status.phase == "Active":
                print(f"Namespace '{name}' exists and is Active.")
                return True
            else:
                print(f"Namespace '{name}' is not Active.")
                return False

        elif object_type == "persistentVolumeClaim":
            if namespace is None:
                raise ValueError("Namespace must be specified for persistentVolumeClaim check.")

            # Check if the PVC exists and is bound
            print(f"Checking if persistentVolumeClaim '{name}' exists in namespace '{namespace}'...")
            pvc_obj = client.read_namespaced_persistent_volume_claim(name=name, namespace=namespace)
            if pvc_obj.status.phase == "Bound":
                print(f"PersistentVolumeClaim '{name}' is Bound and ready.")
                return True
            else:
                print(f"PersistentVolumeClaim '{name}' is not in Bound state.")
                return False

        else:
            raise ValueError(f"Unsupported object type '{object_type}'. Use 'namespace' or 'persistentVolumeClaim'.")

    except ApiException as e:
        if e.status == 404:
            print(f"{object_type.capitalize()} '{name}' not found.")
        else:
            print(f"Error fetching {object_type} status: {e}")
        return False


def create_tarball(source_dir: str, tarball_path: str) -> None:
    """
    Creates a tarball archive of the specified directory.

    Parameters:
    - source_dir: str - Path to the local directory to be archived.
    - tarball_path: str - Path where the tarball will be saved.
    """
    with tarfile.open(tarball_path, "w:gz") as tar:
        tar.add(source_dir, arcname=os.path.basename(source_dir))
    print(f"Created tarball: {tarball_path}")


def copy_to_pod(client: client.CoreV1Api, namespace: str, pod_name: str, container_name: str, tarball_file: str, target_path: str) -> None:
    """
    Copies a local directory to a specified path in a Kubernetes pod.

    Parameters:
    - pod_name: str - Name of the pod to copy files into.
    - namespace: str - Namespace where the pod is located.
    - container: str - Pod's container where to copy the files
    - local_file_path: str - Path to the local directory to be copied.
    - target_path: str - Path in the pod where the files will be copied.
    """
    # Open the tar file as binary
    with open(tarball_file, 'rb') as tar_file:
        # Create the exec command to copy the tarball to the pod
        exec_command = ['tar', 'xvf', '-', '-C', target_path]
        resp = stream(
            client.connect_get_namespaced_pod_exec,
            pod_name,
            namespace,
            command=exec_command,
            container=container_name,
            stderr=True, stdin=True, stdout=True, tty=False,
            _preload_content=False
        )

        # Send the tarball over the stream
        while True:
            data = tar_file.read(1024)
            if not data:
                break
            resp.write_stdin(data)
        resp.close()

        # Capture the output and display it
        print("Tarball copied to pod and extracted successfully")
        stdout_data = resp.read_stdout()
        stderr_data = resp.read_stderr()
        if stdout_data:
            print("STDOUT:", stdout_data)
        if stderr_data:
            print("STDERR:", stderr_data)


def copy_local_folder_to_pod(client: client.CoreV1Api, local_folder:str, namespace:str, pod_name:str, container_name:str, target_path:str) -> None:
    """
    Copy the content of a local folder to a pod

    Parameters:
    - client: client.CoreV1Api - Kubernetes API client
    - local_folder: str - Path to the local directory to be copied.
    - namespace: str - Namespace where the pod is located.
    - pod_name: str - Name of the pod to copy files into.
    - container_name: str - Pod's container where to copy the files
    - target_path: str - Path in the pod where the files will be copied.
    """
    # Step 1: Create a tarball of the local folder
    tarball_path = "/tmp/folder.tar.gz"
    create_tarball(local_folder, tarball_path)

    # Step 2: Copy the tarball into the pod and extract it
    copy_to_pod(client, namespace, pod_name, container_name, tarball_path, target_path)

    # Optional: Clean up the tarball after copying
    os.remove(tarball_path)


In [None]:
# Initialize kube config and client
config.load_kube_config(config_file=env.get_env("KUBECONFIG_PATH"))
kube_client = client.CoreV1Api()

# Look for all available namespaces
namespaces = kube_client.list_namespace()
# Compose the expected namespace that Scanflow creates based on app_name and team_name
environment_namespace = f"scanflow-{env.get_env('SCANFLOW_APP_NAME')}-{env.get_env('SCANFLOW_TEAM_NAME')}"

# Remove the namespace if it exists
for namespace in namespaces.items:
    if environment_namespace == namespace.metadata.name:
        delete_namespace_and_wait(client=kube_client, namespace=environment_namespace)

In [None]:
# Also remove any pre-built docker image
import docker

repository_prefix = f"{env.get_env('DOCKER_REGISTRY')}/{env.get_env('SCANFLOW_APP_NAME')}-{env.get_env('SCANFLOW_TEAM_NAME')}"

docker_client = docker.DockerClient()

# - First remove any unused container
print("Purging containers...")
docker_client.containers.prune()

# - Then prune any image that matches the repository_prefix
print(f"Purging docker tags starting with {repository_prefix}...")
for docker_image in docker_client.images.list():
    for tag in docker_image.tags:
        if tag.startswith(repository_prefix):
            docker_client.images.remove(tag)
            break

## ScanflowClient initialization

In [2]:
from scanflow.client import ScanflowClient
from scanflow.client import ScanflowDeployerClient

### Debug: available environment variables

In [None]:
print(env.get_env("SCANFLOW_SERVER_URI"))
print(env.get_env("SCANFLOW_TRACKER_URI"))
print(env.get_env("MLFLOW_S3_ENDPOINT_URL"))
print(env.get_env("AWS_ACCESS_KEY_ID"))
print(env.get_env("AWS_SECRET_ACCESS_KEY"))
print(env.get_env("DOCKER_REGISTRY"))
print(env.get_env("DOCKER_TAG"))

In [None]:
# App folder - Must point to the folder includeing all 'dataengineer' and 'datascience' folders
# for cloudedge-reactive-migration, allocated in examples/cloudedge-reactive-migration
app_dir = os.path.join(env.get_env('WORKDIR'), env.get_env('REACTIVE_MIGRATION_DATAENGINEER_APP_DIR'))
print(app_dir)
app_name = env.get_env("SCANFLOW_APP_NAME")
team_name = env.get_env("SCANFLOW_TEAM_NAME")

# Initialize the Scanflow Client
scanflow_client = ScanflowClient(
    #if you defined "SCANFLOW_SERVER_URI", you dont need to provide this
    registry=env.get_env("DOCKER_REGISTRY"),
    verbose=True)

## Batch-inference-graph for prediction

### Predictor

In [5]:
# Predictor stages
# - Executor 1: Data retrieval from Prometheus
# - Executor 2: Data pre-processing + QoS Predictor

# Define common variables for the Application stages
output_dir = "/workflow"
csv_root_path = os.path.join(output_dir, f"{app_name}-{team_name}")

executor_1 = scanflow_client.ScanflowExecutor(
    name="data-retrieval",
    mainfile="data-retrieval.py",
    dockerfile="Dockerfile_data_retrieval_no_buildkit",
    parameters={
        'app_name': app_name,
        'team_name': team_name,
        'output_path': csv_root_path,
        'promcsv_config': "/app/data-retrieval/promql_queries.json" # Config file already included in the Docker image
        #'promcsv_config': "/workflow/promql_queries.json" # Config file for debug purposes, manually included in the workflow PVC
    }
)

executor_2 = scanflow_client.ScanflowExecutor(
    name="qos-upload",
    mainfile="qos-upload.py",
    dockerfile="Dockerfile_qos_upload_no_buildkit",
    parameters={
        'name': "QoS preprocessing and upload",
        'app_name': app_name,
        'team_name': team_name,
        'csv_path': csv_root_path, # We expect each experiment run to store results in "${csv_root_path}/run_at_${execution_timestamp}" subfolder
        'csv_sep': ";",
        'purge_local_results': True # This deletes the results CSVs once QoS has been uploaded to the MLflow experiment run
    }
)

# Stages dependencies
# TODO: define them once other stages have been developed
dependency_1 = scanflow_client.ScanflowDependency(
    dependee='data-retrieval',
    depender='qos-upload'
)

# Predictor workflow: batch-inference-reactive-graph
# TODO: add missing executors and dependencies
workflow_1 = scanflow_client.ScanflowWorkflow(
    name="batch-inference-reactive-graph",
    nodes=[executor_1, executor_2],
    edges=[dependency_1],
    type="batch",
    cron="*/5 * * * *",
    output_dir=output_dir,
    image_pull_secrets=["cloudskin-registry"] # Required for Workflow templates
)

### Planner

In [6]:
trigger = scanflow_client.ScanflowAgentSensor_IntervalTrigger(minutes=5)
sensor = scanflow_client.ScanflowAgentSensor(
    name="reactive_watch_qos",
    isCustom=True,
    func_name="reactive_watch_qos",
    trigger=trigger,
    kwargs={
        'frequency': 300
    }
)
planner = scanflow_client.ScanflowAgent(
    name="planner",
    dockerfile="Dockerfile_scanflow_planner",
    template="planner",
    sensors=[sensor],
    image_pull_secret="cloudskin-registry" # Required when deploying to Kubernetes cluster (created during deployment)
)

### Compose the Scanflow Application

In [7]:
app = scanflow_client.ScanflowApplication(
    app_name=app_name,
    app_dir=app_dir,
    team_name=team_name,
    workflows=[workflow_1],
    agents=[planner]
)

### DEBUG: show application config

In [8]:
#app.to_dict()

### Build the Scanflow Application
- This step builds the Docker images for all the Scanflow executors and uploads them to the container registry (currently hardcoded in the `scanflow` module)

In [None]:
# Define the Scanflow Tracker Port (32767)
build_app = scanflow_client.build_ScanflowApplication(
    app=app,
    trackerPort=32767,
    image_pull_secret="cloudskin-registry" # Required when deploying to Kubernetes (created during deployment)
)

### DEBUG: show built application config

In [10]:
#build_app.to_dict()

### Create a ScanflowDeployerClient

This client creates the required environment for Scanflow to run the pipelines in a Kubernetes cluster based on the built application. It can:

- Create an environment for the Scanflow application within its own namespace
- Deploy a local Scanflow Tracker
- Run the application as an Argo Workflow

In [None]:
# Initialize the deployer client
if env.get_env("LOCAL_DEPLOY"):
    deployer_client = ScanflowDeployerClient(
        user_type="local",
        deployer="argo",
        k8s_config_file=env.get_env("KUBECONFIG_PATH")
    )

### Deploy the ScanflowEnvironment
This creates:
- A namespace for the application
- A Deployment for the local scanflow tracker
- A Deployment for all the agents (in this case there's only the planner)
  - Planner doesn't include right now the `scanflow` module, so it must be copied inside the planner's PVC so the container finds it in the `/scanflow/scanflow/scanflow` path

Go to your Kubernetes cluster and check that both tracker and planner pods are Running without errors in the `scanflow-cloudedge-reactive-migration-dataengineer`.

In [12]:
# Compose a custom ScanflowEnvironment
from scanflow.deployer.env import ScanflowEnvironment
data_eng_env = ScanflowEnvironment()
data_eng_env.namespace=f"scanflow-{build_app.app_name}-{build_app.team_name}"
# TRACKER STORAGE MUST BE ALREADY DEPLOYED IN ITS OWN NAMESPACE (i.e: "scanflow-server")
# - "scanflow" db must already exist in postgresql
# - "scanflow" bucket must already exist in MinIO
#data_eng_env.tracker_config.TRACKER_STORAGE = f"postgresql://postgres:scanflow123@postgresql.scanflow-server/scanflow"
data_eng_env.tracker_config.TRACKER_STORAGE = env.get_env("SCANFLOW_TRACKER_STORAGE")
data_eng_env.tracker_config.TRACKER_ARTIFACT = f"s3://scanflow/{data_eng_env.namespace}"
# CLIENT CONFIG: REPLACE WITH CURRENTLY DEPLOYED SERVICES IN "scanflow-server" namespace
#data_eng_env.client_config.SCANFLOW_TRACKER_LOCAL_URI = f"http://scanflow-server-tracker-service.scanflow-server"
#data_eng_env.client_config.SCANFLOW_TRACKER_URI = f"http://scanflow-server-tracker-service.scanflow-server"
#data_eng_env.client_config.SCANFLOW_SERVER_URI = f"http://scanflow-server-tracker-service.scanflow-server"
data_eng_env.client_config.SCANFLOW_TRACKER_LOCAL_URI = env.get_env("SCANFLOW_SERVER_URI")
data_eng_env.client_config.SCANFLOW_TRACKER_URI = env.get_env("SCANFLOW_SERVER_URI")
data_eng_env.client_config.SCANFLOW_SERVER_URI = env.get_env("SCANFLOW_SERVER_URI")
# MINIO MUST BE ALREADY DEPLOYED IN ITS OWN NAMESPACE (i.e: "scanflow-server")
data_eng_env.secret.AWS_ACCESS_KEY_ID = env.get_env("AWS_ACCESS_KEY_ID")
data_eng_env.secret.AWS_SECRET_ACCESS_KEY = env.get_env("AWS_SECRET_ACCESS_KEY")
data_eng_env.secret.MLFLOW_S3_ENDPOINT_URL = env.get_env("MLFLOW_S3_ENDPOINT_URL")
data_eng_env.secret.AWS_ENDPOINT_URL = env.get_env("AWS_ENDPOINT_URL")
# NEW: configure image pull secret
data_eng_env.image_pull_secret.name = "cloudskin-registry"
data_eng_env.image_pull_secret.registry = env.get_env("DOCKER_REGISTRY")
data_eng_env.image_pull_secret.username = env.get_env("DOCKER_REGISTRY_USERNAME")
data_eng_env.image_pull_secret.password = env.get_env("DOCKER_REGISTRY_PASSWORD")
data_eng_env.image_pull_secret.email = "cloudskin-project@bsc.es"

In [None]:
# Create the application environment
if env.get_env("LOCAL_DEPLOY"):
    await deployer_client.create_environment(
        app=build_app,
        scanflowEnv=data_eng_env
    )

### Manual task: copy `scanflow` module
This step copies this repository version of `scanflow` module inside the environment's PersistentVolumeClaim. The environment creation is done with asynchronous API calls, so we must ensure that both the `namespace` and the `persistentVolumeClaim` are already available before proceeding.

In [None]:
# Steps:
# - Local variables:
debug_pod_yaml = os.path.join(env.get_env("WORKDIR"), "tutorials", "cloudedge-reactive-migration", "debug_pod.yaml")
persistent_volume_claim = f"scanflow-{environment_namespace}"
scanflow_folder = os.path.join(env.get_env("WORKDIR"), "scanflow")

# - Check that the persistentVolumeClaim is properly Bound
while not check_if_object_exists_and_ready(
    client=kube_client,
    object_type="persistentVolumeClaim",
    name=persistent_volume_claim,
    namespace=environment_namespace
):
    # Wait 2 seconds for the next check
    sleep(2)

# - Deploy a Pod in the environment namespace that mounts the environment's persistentVolumeClaim.
#   For now we'll provide a YAML file with the expected name of the PVC, but in the future
#   this should be provided either by the ScanflowDeployClient or a Kubernetes API call
deploy_pod_and_wait(
    client=kube_client,
    yaml_file=debug_pod_yaml,
    namespace=environment_namespace
)

# - Once the pod is Running, proceed to compress the `scanflow` folder onto a tar file; then send it to the Pod
#   and uncompress it at the destination path
copy_local_folder_to_pod(
    client=kube_client,
    local_folder=scanflow_folder,
    namespace=environment_namespace,
    pod_name="cloudedge-debug-pod",
    container_name="busybox",
    target_path="/scanflow/scanflow/"
)

# - We can leave the Pod running for debugging purposes

## Run Workflow to test
This composes an Argo CronWorkflow for the application and submits it to the Argo Workflows engine:
- Pre-requisites: Argo Workflows must be set to use the `default` service account when no `serviceAccount` is provided in the template

In [None]:
if env.get_env("LOCAL_DEPLOY"):
    await deployer_client.run_app(app=build_app)
    # DEBUG - TODO: if using external config files, automate their copy inside the workflow PVC instead of doing it manually
    # - Copy Promcsv config file so it is available within the container in the /workflow/promql_queries.json path

## Clean-up

### Remove Scanflow application
This will delete the target Scanflow application:
- Remove its Argo Workflow object
  - Currently not working as Workflow names or CronWorkflow names don't match the generated ones by `couler`
- Remove its PVC and related PV (created during Argo Workflow execution)

In [15]:
# if env.get_env("LOCAL_DEPLOY"):
#     await deployer_client.delete_app(app=build_app)

### Remove Scanflow environment

In [16]:
# if env.get_env("LOCAL_DEPLOY"):
#     await deployer_client.clean_environment(app=build_app, scanflow_env=data_eng_env)

## MLFlow debug cell

In [None]:
# if env.get_env("LOCAL_DEPLOY"):
#     import mlflow

#     client = ScanflowTrackerClient(scanflow_tracker_local_uri=env.get_env("SCANFLOW_TRACKER_URI"))
#     mlflow.set_tracking_uri(client.get_tracker_uri(True))
#     # Retrieve the Application experiment
    
#     reactive_experiment = mlflow.get_experiment_by_name(app_name)
#     experiment_id = reactive_experiment.experiment_id

#     # Retrieve filtered experiment runs by run_name, ordered by descending end time --> First entry will be the most recent
#     runs_df = mlflow.search_runs([experiment_id], filter_string=f"run_name='{team_name}'", order_by=["end_time DESC"])
#     run_id = runs_df.loc[[0]]['run_id'][0]
#     print(run_id)