In [None]:
#| default_exp airflow.aws_batch_executor

In [None]:
#| export

import tempfile
import shlex
import yaml
from pathlib import Path
from typing import *

from fastcore.script import call_parse, Param

from airt_service.sanitizer import sanitized_print
from airt.executor.subcommand import CLICommandBase, ClassCLICommand
from airt.helpers import slugify
from airt.logger import get_logger
from airt.patching import patch
from airt_service.airflow.base_executor import BaseAirflowExecutor, dag_template
from airt_service.airflow.utils import trigger_dag, wait_for_run_to_complete
from airt_service.aws.batch_utils import (
    _create_default_batch_environment_config,
    create_testing_batch_environment_ctx,
)
from airt_service.aws.utils import get_batch_environment_arns, get_queue_definition_arns
from airt_service.batch_job import get_environment_vars_for_batch_job
from airt_service.helpers import generate_random_string

22-10-20 06:56:34.090 [INFO] airt.executor.subcommand: Module loaded.


In [None]:
import os
import pytest
from datetime import timedelta
from time import sleep

from airt.executor.subcommand import SimpleCLICommand
from airt.testing import activate_by_import
from airt_service.airflow.utils import list_dag_runs
from airt_service.db.models import create_user_for_testing

[INFO] airt.testing.activate_by_import: Testing environment activated.
[INFO] airt.keras.helpers: Using a single GPU #0 with memory_limit 1024 MB


In [None]:
test_username = create_user_for_testing(subscription_type="small")
display(test_username)

'ykenjiugzn'

In [None]:
#| exporti

logger = get_logger(__name__)

In [None]:
logger.info("Module loaded.")

[INFO] __main__: Module loaded.


In [None]:
#| exporti


def setup_test_paths(td: str) -> Tuple[str, str]:
    d = Path(td)
    paths = [d / sd for sd in ["data", "model"]]
    sanitized_print(f"{paths=}")

    # create tmp dirs for data and model
    for p in paths:
        p.mkdir(parents=True, exist_ok=True)

    # RemotePaths: data_path is "read-only", while model_path can be used for both reading and writing between calls
    return tuple(f"local:{p}" for p in paths)  # type: ignore

In [None]:
with tempfile.TemporaryDirectory() as d:
    data_path_url, model_path_url = setup_test_paths(d)

data_path_url, model_path_url

paths=[Path('/tmp/tmp3bk6g62c/data'), Path('/tmp/tmp3bk6g62c/model')]


('local:/tmp/tmp3bk6g62c/data', 'local:/tmp/tmp3bk6g62c/model')

In [None]:
#| export

DEFAULT_EXEC_ENVIRONMENT = "preprocessing"

In [None]:
#| export


class AirflowAWSBatchExecutor(BaseAirflowExecutor):
    def __init__(
        self,
        steps: List[CLICommandBase],
        region: str,
        exec_environments: Optional[List[Optional[str]]] = None,
        batch_environment_arn_path: Optional[Union[str, Path]] = None,
    ):
        """Constructs a new AirflowAWSBatchExecutor instance

        Args:
            steps: List of instances of either ClassCLICommand or SimpleCLICommand
            region: Region to execute
            exec_environments: List of execution environments to execute steps
        """
        self.region = region
        self.batch_environment_arn_path = batch_environment_arn_path

        if exec_environments is None:
            exec_environments = [DEFAULT_EXEC_ENVIRONMENT] * len(steps)

        if len(exec_environments) != len(steps):
            raise ValueError(
                f"len(exec_environments)={len(exec_environments)} != len(steps){len(steps)}"
            )

        existing_exec_environments = list(
            get_batch_environment_arns(
                self.region, self.batch_environment_arn_path
            ).keys()
        )

        self.exec_environments = []
        for exec_env in exec_environments:
            if exec_env is None:
                self.exec_environments.append(DEFAULT_EXEC_ENVIRONMENT)
                continue
            if exec_env not in existing_exec_environments:
                raise ValueError(
                    f"Invalid value {exec_env} given for exec environment; Allowed values are {existing_exec_environments}"
                )
            self.exec_environments.append(exec_env)

        self.exec_environments = [
            exec_env if exec_env is not None else DEFAULT_EXEC_ENVIRONMENT
            for exec_env in exec_environments
        ]

        super(AirflowAWSBatchExecutor, self).__init__(steps)

    def execute(
        self,
        *,
        description: str,
        tags: Union[str, List[str]],
        on_step_start: Optional[CLICommandBase] = None,
        on_step_end: Optional[CLICommandBase] = None,
        **kwargs,
    ) -> Tuple[Path, str]:
        """Create DAG and execute steps in airflow

        Args:
            description: description of DAG
            tags: tags for DAG
            on_step_start: CLI to call before executing step/task in DAG
            on_step_end: CLI to call after executing step/task in DAG
            kwargs: keyword arguments needed for steps/tasks
        Returns:
            A tuple which contains dag file path and run id
        """
        raise NotImplementedError("Need to implement")

In [None]:
def save_test_batch_environment_arns(folder: Path):
    test_batch_environment_arns = {
        "eu-west-1": {
            task: {
                arn: "arn:aws:batch:placeholder"
                for arn in [
                    "compute_environment_arn",
                    "job_definition_arn",
                    "job_queue_arn",
                ]
            }
            for task in ["csv_processing", "predictions", "preprocessing", "training"]
        }
    }

    folder = Path(folder)
    test_batch_environment_arn_path = folder / "batch_environment.yml"
    with open(test_batch_environment_arn_path, "w") as f:
        yaml.dump(test_batch_environment_arns, f, default_flow_style=False)

    return test_batch_environment_arn_path

In [None]:
steps = [
    ClassCLICommand(
        executor_name="test-executor", class_name="MyTestExecutor", f_name="f"
    ),
    ClassCLICommand(
        executor_name="test-executor", class_name="MyTestExecutor", f_name="g"
    ),
]

In [None]:
region = "eu-west-1"
with tempfile.TemporaryDirectory() as d:
    data_path_url, model_path_url = setup_test_paths(d)

    test_batch_environment_arn_path = save_test_batch_environment_arns(d)
    abe = AirflowAWSBatchExecutor(
        steps=steps,
        region=region,
        batch_environment_arn_path=test_batch_environment_arn_path,
    )
    display(abe.exec_environments)
    assert abe.exec_environments == ["preprocessing"] * len(steps)

    with pytest.raises(ValueError) as e:
        abe = AirflowAWSBatchExecutor(
            steps=steps,
            region=region,
            exec_environments=["preprocessing"],
            batch_environment_arn_path=test_batch_environment_arn_path,
        )
    display(e)

    with pytest.raises(ValueError) as e:
        abe = AirflowAWSBatchExecutor(
            steps=steps,
            region=region,
            exec_environments=["gibberish", "gibberish"],
            batch_environment_arn_path=test_batch_environment_arn_path,
        )
    display(e)

paths=[Path('/tmp/tmpe6nohwug/data'), Path('/tmp/tmpe6nohwug/model')]


['preprocessing', 'preprocessing']

<ExceptionInfo ValueError('len(exec_environments)=1 != len(steps)2') tblen=2>

<ExceptionInfo ValueError("Invalid value gibberish given for exec environment; Allowed values are ['csv_processing', 'predictions', 'preprocessing', 'training']") tblen=2>

In [None]:
#| export


@patch
def _create_step_template(
    self: AirflowAWSBatchExecutor, step: CLICommandBase, exec_environment: str, **kwargs
):
    """
    Create template for step

    Args:
        step: step to create template
        kwargs: keyword arguments for step
    Returns:
        Template for step
    """
    cli_command = step.to_cli(**kwargs)
    task_id = slugify(cli_command)

    batch_environment_vars = [
        dict(name=name, value=value)
        for name, value in get_environment_vars_for_batch_job().items()
    ]
    overrides = (
        dict(
            command=shlex.split(cli_command),
            environment=batch_environment_vars,
        )
        .__repr__()
        .replace("{", "{{")
        .replace("}", "}}")
    )

    job_queue_arn, job_definition_arn = get_queue_definition_arns(
        task=exec_environment,
        region=self.region,
        batch_environment_arn_path=self.batch_environment_arn_path,
    )

    task = f"""BatchOperator(task_id='{task_id}', job_definition="{job_definition_arn}", job_queue="{job_queue_arn}", job_name="{task_id}", overrides={overrides})"""

    return task

In [None]:
region = "eu-west-1"
with tempfile.TemporaryDirectory() as d:
    data_path_url, model_path_url = setup_test_paths(d)

    test_batch_environment_arn_path = save_test_batch_environment_arns(d)
    abe = AirflowAWSBatchExecutor(
        steps=steps,
        region=region,
        batch_environment_arn_path=test_batch_environment_arn_path,
    )
    actual = abe._create_step_template(
        steps[0],
        exec_environment="training",
        data_path_url=data_path_url,
        model_path_url=model_path_url,
    )
    display(actual)

paths=[Path('/tmp/tmpb2olcuby/data'), Path('/tmp/tmpb2olcuby/model')]


'BatchOperator(task_id=\'test-executor-my_test_executor-f-data-path-urllocaltmptmpb2olcubydata-model-path-urllocaltmptmpb2olcubymodel\', job_definition="arn:aws:batch:placeholder", job_queue="arn:aws:batch:placeholder", job_name="test-executor-my_test_executor-f-data-path-urllocaltmptmpb2olcubydata-model-path-urllocaltmptmpb2olcubymodel", overrides={{\'command\': [\'test-executor\', \'my_test_executor\', \'f\', \'--data-path-url=local:/tmp/tmpb2olcuby/data\', \'--model-path-url=local:/tmp/tmpb2olcuby/model\'], \'environment\': [{{\'name\': \'AWS_ACCESS_KEY_ID\', \'value\': \'********************\'}}, {{\'name\': \'AWS_SECRET_ACCESS_KEY\', \'value\': \'IAg+6O9hGcOuN+b6hwNtlNiPCHQk3mpMNxxeyMUa\'}}, {{\'name\': \'AWS_DEFAULT_REGION\', \'value\': \'eu-west-1\'}}, {{\'name\': \'AZURE_SUBSCRIPTION_ID\', \'value': '********-****-****-****-************'}}, {{\'name\': \'AZURE_TENANT_ID\', \'value': '********-****-****-****-************'}}, {{\'name\': \'AZURE_CLIENT_ID\', \'value': '********-*

In [None]:
#| export


@patch
def _create_dag_template(
    self: AirflowAWSBatchExecutor,
    on_step_start: Optional[CLICommandBase] = None,
    on_step_end: Optional[CLICommandBase] = None,
    **kwargs,
) -> str:
    """
    Create DAG template with steps as tasks

    Args:
        on_step_start: CLI to call before executing step/task in DAG
        on_step_end: CLI to call after executing step/task in DAG
        kwargs: keyword arguments to pass to steps' CLI
    Returns:
        Generated DAG with steps as tasks
    """
    curr_dag_template = dag_template

    downstream_tasks = ""
    newline = "\n"
    tab = " " * 4

    existing_tasks = 0
    for i, step in enumerate(self.steps):
        if on_step_start is not None:
            curr_dag_template += f"""{newline}{tab}t{existing_tasks+1} = {self._create_step_template(on_step_start, self.exec_environments[i], step_count=i+1, **kwargs)}"""  # type: ignore
            existing_tasks += 1

        curr_dag_template += f"""{newline}{tab}t{existing_tasks+1} = {self._create_step_template(step, self.exec_environments[i], **kwargs)}"""  # type: ignore
        existing_tasks += 1

        if on_step_end is not None:
            curr_dag_template += f"""{newline}{tab}t{existing_tasks+1} = {self._create_step_template(on_step_end, self.exec_environments[i], step_count=i+1, **kwargs)}"""  # type: ignore
            existing_tasks += 1

    downstream_tasks = f"{newline}{tab}" + " >> ".join(
        [f"t{i}" for i in range(1, existing_tasks + 1)]
    )
    curr_dag_template += downstream_tasks

    return curr_dag_template

In [None]:
region = "eu-west-1"
with tempfile.TemporaryDirectory() as d:
    data_path_url, model_path_url = setup_test_paths(d)
    test_batch_environment_arn_path = save_test_batch_environment_arns(d)

    kwargs = {"data_path_url": data_path_url, "model_path_url": model_path_url}

    abe = AirflowAWSBatchExecutor(
        steps=steps,
        region=region,
        batch_environment_arn_path=test_batch_environment_arn_path,
    )

    on_step_start = SimpleCLICommand(command="sleep {step_count}")
    on_step_end = SimpleCLICommand(command="echo step {step_count} completed")
    sanitized_print(
        abe._create_dag_template(
            on_step_start=on_step_start, on_step_end=on_step_end, **kwargs
        )
    )

paths=[Path('/tmp/tmpo7ej_l42/data'), Path('/tmp/tmpo7ej_l42/model')]
import datetime
from textwrap import dedent

# The DAG object; we'll need this to instantiate a DAG
from airflow import DAG

# Operators; we need this to operate!
from airflow.providers.amazon.aws.operators.batch import BatchOperator
import azure.batch.models as batchmodels
from airflow.providers.microsoft.azure.operators.batch import AzureBatchOperator
from airflow.operators.bash import BashOperator
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
with DAG(
    '{dag_name}',
    # These args will get passed on to each operator
    # You can override them on a per-task basis during operator initialization
    default_args={{
        'schedule_interval': {schedule_interval},
        'depends_on_past': False,
        'email': ['info@airt.ai'],
        'email_on_failure': False,
        'email_on_retry': False,
        'retries': 1,
        'retry_delay': datetime.timedelta(minutes=5),
        # 'queue

In [None]:
# | eval: false
# Test case for AirflowAWSBatchExecutor._create_dag
region = "eu-west-1"
with tempfile.TemporaryDirectory() as d:
    data_path_url, model_path_url = setup_test_paths(d)
    steps = [
        ClassCLICommand(
            executor_name="test-executor", class_name="MyTestExecutor", f_name="f"
        ),
        ClassCLICommand(
            executor_name="test-executor", class_name="MyTestExecutor", f_name="g"
        ),
    ]
    exec_environments = ["training", None]
    on_step_start = SimpleCLICommand(command="sleep {step_count}")
    on_step_end = SimpleCLICommand(command="echo step {step_count} completed")

    td = Path(d)
    env_config_path = td / "env_config.yaml"
    created_env_info_path = td / "output_file.yaml"
    _create_default_batch_environment_config(
        prefix=f"airflow_batch_create_dag_testing_{generate_random_string()}",
        output_path=env_config_path,
        regions=[region],
    )

    with open(env_config_path) as f:
        env_config = yaml.safe_load(f)
    display(f"{env_config=}")
    with create_testing_batch_environment_ctx(
        input_yaml_path=env_config_path, output_yaml_path=created_env_info_path
    ):
        abe = AirflowAWSBatchExecutor(
            steps=steps,
            region=region,
            exec_environments=exec_environments,
            batch_environment_arn_path=created_env_info_path,
        )
        dag_id, dag_file_path = abe._create_dag(
            data_path_url=data_path_url,
            model_path_url=model_path_url,
            #         schedule_interval="@weekly",
            schedule_interval=None,
            description="test description",
            tags="test_tag",
            on_step_start=on_step_start,
            on_step_end=on_step_end,
        )

        display(f"{dag_file_path=}")
        dag_id = str(dag_file_path).split("/")[-1].split(".py")[0]

        sleep(15)

        dag_runs = list_dag_runs(dag_id=dag_id)
        display(f"{dag_runs=}")

        run_id = trigger_dag(dag_id=dag_id, conf={})

        #     run_id = dag_runs[0]["run_id"]
        display(run_id)
        state = wait_for_run_to_complete(dag_id=dag_id, run_id=run_id, timeout=3600)
        display(state)
        dag_file_path.unlink()

paths=[Path('/tmp/tmpl3ltl4p3/data'), Path('/tmp/tmpl3ltl4p3/model')]


"env_config={'eu-west-1': {'csv_processing': {'compute_environment': {'instance_type': 'r5.16xlarge', 'max_instances': 10, 'min_instances': 0, 'name': 'airflow_batch_create_dag_testing_Y8DBBW_csv_processing_compute_environment'}, 'job_definition': {'image': 'registry.gitlab.com/airt.ai/airt-service:dev', 'name': 'airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_definition'}, 'job_queue': {'name': 'airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', 'priority': 100}}, 'predictions': {'compute_environment': {'instance_type': 'g4dn.xlarge', 'max_instances': 10, 'min_instances': 0, 'name': 'airflow_batch_create_dag_testing_Y8DBBW_predictions_compute_environment'}, 'job_definition': {'image': 'registry.gitlab.com/airt.ai/airt-service:dev', 'name': 'airflow_batch_create_dag_testing_Y8DBBW_predictions_job_definition'}, 'job_queue': {'name': 'airflow_batch_create_dag_testing_Y8DBBW_predictions_job_queue', 'priority': 100}}, 'preprocessing': {'compute_environment': {'in

task_name='csv_processing'
[INFO] botocore.credentials: Found credentials in environment variables.
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_compute_environment', status=CREATING, state=ENABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_compute_environment', status=CREATING, state=ENABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_compute_environment', status=VALID, state=ENABLED
compute_env.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_compute_environment'
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:j

"dag_file_path=Path('/root/airflow/dags/test-executor-my_test_executor-f-data-path-urllocaltmptmpl3ltl4p3data-model-path-urllocaltmptmpl3ltl4p3model_test-executor-my_test_executor-g-data-path-urllocaltmptmpl3ltl4p3data-model-path-urllocaltmptmpl3ltl4p3model.py')"

'dag_runs=[]'

[{'dag_id': 'test-executor-my_test_executor-f-data-path-urllocaltmptmpl3ltl4p3data-model-path-urllocaltmptmpl3ltl4p3model_test-executor-my_test_executor-g-data-path-urllocaltmptmpl3ltl4p3data-model-path-urllocaltmptmpl3ltl4p3model', 'run_id': 'airt-service__2022-10-20T06:57:44.498750', 'state': 'running', 'execution_date': '2022-10-20T06:57:45+00:00', 'start_date': '2022-10-20T06:57:46.328910+00:00', 'end_date': ''}]


'airt-service__2022-10-20T06:57:44.498750'

'success'

deleting job definition - csv_processing
deleting job queue - csv_processing
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=UPDATING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=VALID, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_csv_processing_job_queue', status=DELETING, state=DISA

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] air

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_job_queue', status=DELETING, state=DISABLED


[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_compute_environment', status=UPDATING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_compute_environment', status=UPDATING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_compute_environment', status=UPDATING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_create_dag_testing_Y8DBBW_preprocessing_compute_environment', status=UPDATING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_create_dag_testing_Y8DBBW_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.b

In [None]:
# | eval: false
# Test case for AirflowAWSBatchExecutor.schedule
region = "eu-west-1"
with tempfile.TemporaryDirectory() as d:
    data_path_url, model_path_url = setup_test_paths(d)
    steps = [
        ClassCLICommand(
            executor_name="test-executor", class_name="MyTestExecutor", f_name="f"
        ),
        ClassCLICommand(
            executor_name="test-executor", class_name="MyTestExecutor", f_name="g"
        ),
    ]
    exec_environments = ["csv_processing", "preprocessing"]
    on_step_start = SimpleCLICommand(command="sleep {step_count}")
    on_step_end = SimpleCLICommand(command="echo step {step_count} completed")

    td = Path(d)
    env_config_path = td / "env_config.yaml"
    created_env_info_path = td / "output_file.yaml"
    _create_default_batch_environment_config(
        prefix=f"airflow_batch_schedule_testing_{generate_random_string()}",
        output_path=env_config_path,
        regions=[region],
    )

    with open(env_config_path) as f:
        env_config = yaml.safe_load(f)
    display(f"{env_config=}")
    with create_testing_batch_environment_ctx(
        input_yaml_path=env_config_path, output_yaml_path=created_env_info_path
    ):
        abe = AirflowAWSBatchExecutor(
            steps=steps,
            region=region,
            exec_environments=exec_environments,
            batch_environment_arn_path=created_env_info_path,
        )
        dag_file_path = abe.schedule(
            data_path_url=data_path_url,
            model_path_url=model_path_url,
            #         schedule_interval="@weekly",
            schedule_interval=timedelta(days=7),
            description="test description",
            tags="test_tag",
            on_step_start=on_step_start,
            on_step_end=on_step_end,
        )

        display(f"{dag_file_path=}")
        dag_id = str(dag_file_path).split("/")[-1].split(".py")[0]

        sleep(15)

        dag_runs = list_dag_runs(dag_id=dag_id)
        display(f"{dag_runs=}")

        run_id = trigger_dag(dag_id=dag_id, conf={})

        #     run_id = dag_runs[0]["run_id"]
        display(run_id)
        state = wait_for_run_to_complete(dag_id=dag_id, run_id=run_id, timeout=3600)
        display(state)
        dag_file_path.unlink()

paths=[Path('/tmp/tmpfrqhoov3/data'), Path('/tmp/tmpfrqhoov3/model')]


"env_config={'eu-west-1': {'csv_processing': {'compute_environment': {'instance_type': 'r5.16xlarge', 'max_instances': 10, 'min_instances': 0, 'name': 'airflow_batch_schedule_testing_AZQI41_csv_processing_compute_environment'}, 'job_definition': {'image': 'registry.gitlab.com/airt.ai/airt-service:dev', 'name': 'airflow_batch_schedule_testing_AZQI41_csv_processing_job_definition'}, 'job_queue': {'name': 'airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', 'priority': 100}}, 'predictions': {'compute_environment': {'instance_type': 'g4dn.xlarge', 'max_instances': 10, 'min_instances': 0, 'name': 'airflow_batch_schedule_testing_AZQI41_predictions_compute_environment'}, 'job_definition': {'image': 'registry.gitlab.com/airt.ai/airt-service:dev', 'name': 'airflow_batch_schedule_testing_AZQI41_predictions_job_definition'}, 'job_queue': {'name': 'airflow_batch_schedule_testing_AZQI41_predictions_job_queue', 'priority': 100}}, 'preprocessing': {'compute_environment': {'instance_type'

task_name='csv_processing'
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_schedule_testing_AZQI41_csv_processing_compute_environment', status=CREATING, state=ENABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_schedule_testing_AZQI41_csv_processing_compute_environment', status=VALID, state=ENABLED
compute_env.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_schedule_testing_AZQI41_csv_processing_compute_environment'
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=CREATING, state=ENABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=VALID, state=ENABLED

"dag_file_path=Path('/root/airflow/dags/test-executor-my_test_executor-f-data-path-urllocaltmptmpfrqhoov3data-model-path-urllocaltmptmpfrqhoov3model_test-executor-my_test_executor-g-data-path-urllocaltmptmpfrqhoov3data-model-path-urllocaltmptmpfrqhoov3model.py')"

'dag_runs=[]'

[{'dag_id': 'test-executor-my_test_executor-f-data-path-urllocaltmptmpfrqhoov3data-model-path-urllocaltmptmpfrqhoov3model_test-executor-my_test_executor-g-data-path-urllocaltmptmpfrqhoov3data-model-path-urllocaltmptmpfrqhoov3model', 'run_id': 'airt-service__2022-10-20T07:20:44.543539', 'state': 'running', 'execution_date': '2022-10-20T07:20:45+00:00', 'start_date': '2022-10-20T07:20:46.540329+00:00', 'end_date': ''}]


'airt-service__2022-10-20T07:20:44.543539'

'success'

deleting job definition - csv_processing
deleting job queue - csv_processing
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=UPDATING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=VALID, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testi

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] air

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_schedule_testing_AZQI41_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils

In [None]:
#| export


@patch
def execute(
    self: AirflowAWSBatchExecutor,
    *,
    description: str,
    tags: Union[str, List[str]],
    on_step_start: Optional[CLICommandBase] = None,
    on_step_end: Optional[CLICommandBase] = None,
    **kwargs
) -> Tuple[Path, str]:
    """Create DAG and execute steps in airflow

    Args:
        description: description of DAG
        tags: tags for DAG
        on_step_start: CLI to call before executing step/task in DAG
        on_step_end: CLI to call after executing step/task in DAG
        kwargs: keyword arguments needed for steps/tasks
    Returns:
        A tuple which contains dag file path and run id
    """
    schedule_interval = None
    dag_id, dag_file_path = self._create_dag(
        schedule_interval=schedule_interval,
        description=description,
        tags=tags,
        on_step_start=on_step_start,
        on_step_end=on_step_end,
        **kwargs
    )

    run_id = trigger_dag(dag_id=dag_id, conf={})
    return dag_file_path, run_id

In [None]:
# | eval: false

region = "eu-west-1"
with tempfile.TemporaryDirectory() as d:
    data_path_url, model_path_url = setup_test_paths(d)

    steps = [
        ClassCLICommand(
            executor_name="test-executor", class_name="MyTestExecutor", f_name="f"
        ),
        ClassCLICommand(
            executor_name="test-executor", class_name="MyTestExecutor", f_name="g"
        ),
    ]
    exec_environments = ["training", "predictions"]
    on_step_start = SimpleCLICommand(command="sleep {step_count}")
    on_step_end = SimpleCLICommand(command="echo step {step_count} completed")

    td = Path(d)
    env_config_path = td / "env_config.yaml"
    created_env_info_path = td / "output_file.yaml"
    _create_default_batch_environment_config(
        prefix=f"airflow_batch_execute_testing_{generate_random_string()}",
        output_path=env_config_path,
        regions=[region],
    )

    with open(env_config_path) as f:
        env_config = yaml.safe_load(f)
    display(f"{env_config=}")
    with create_testing_batch_environment_ctx(
        input_yaml_path=env_config_path, output_yaml_path=created_env_info_path
    ):
        abe = AirflowAWSBatchExecutor(
            steps=steps,
            region=region,
            exec_environments=exec_environments,
            batch_environment_arn_path=created_env_info_path,
        )

        dag_file_path, run_id = abe.execute(
            description="test description",
            tags="test_tag",
            on_step_start=on_step_start,
            on_step_end=on_step_end,
            data_path_url=data_path_url,
            model_path_url=model_path_url,
        )
        display(dag_file_path)
        display(run_id)

        dag_id = str(dag_file_path).split("/")[-1].split(".py")[0]
        state = wait_for_run_to_complete(dag_id=dag_id, run_id=run_id, timeout=3600)
        display(state)
        dag_file_path.unlink()

paths=[Path('/tmp/tmp2pf6bm4k/data'), Path('/tmp/tmp2pf6bm4k/model')]


"env_config={'eu-west-1': {'csv_processing': {'compute_environment': {'instance_type': 'r5.16xlarge', 'max_instances': 10, 'min_instances': 0, 'name': 'airflow_batch_execute_testing_CSF42Z_csv_processing_compute_environment'}, 'job_definition': {'image': 'registry.gitlab.com/airt.ai/airt-service:dev', 'name': 'airflow_batch_execute_testing_CSF42Z_csv_processing_job_definition'}, 'job_queue': {'name': 'airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', 'priority': 100}}, 'predictions': {'compute_environment': {'instance_type': 'g4dn.xlarge', 'max_instances': 10, 'min_instances': 0, 'name': 'airflow_batch_execute_testing_CSF42Z_predictions_compute_environment'}, 'job_definition': {'image': 'registry.gitlab.com/airt.ai/airt-service:dev', 'name': 'airflow_batch_execute_testing_CSF42Z_predictions_job_definition'}, 'job_queue': {'name': 'airflow_batch_execute_testing_CSF42Z_predictions_job_queue', 'priority': 100}}, 'preprocessing': {'compute_environment': {'instance_type': 'r5.

task_name='csv_processing'
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_execute_testing_CSF42Z_csv_processing_compute_environment', status=CREATING, state=ENABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_execute_testing_CSF42Z_csv_processing_compute_environment', status=CREATING, state=ENABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_execute_testing_CSF42Z_csv_processing_compute_environment', status=VALID, state=ENABLED
compute_env.arn='arn:aws:batch:eu-west-1:617504802562:compute-environment/airflow_batch_execute_testing_CSF42Z_csv_processing_compute_environment'
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=CREAT

Path('/root/airflow/dags/test-executor-my_test_executor-f-data-path-urllocaltmptmp2pf6bm4kdata-model-path-urllocaltmptmp2pf6bm4kmodel_test-executor-my_test_executor-g-data-path-urllocaltmptmp2pf6bm4kdata-model-path-urllocaltmptmp2pf6bm4kmodel.py')

'airt-service__2022-10-20T07:48:50.615929'

'success'

deleting job definition - csv_processing
deleting job queue - csv_processing
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=UPDATING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=VALID, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CS

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] air

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.b

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_ser

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_ser

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_CSF42Z_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wai

In [None]:
#| export


def _test_aws_batch_executor(region: str = "eu-west-1"):  # type: ignore
    with tempfile.TemporaryDirectory() as d:
        data_path_url, model_path_url = setup_test_paths(d)

        steps = [
            ClassCLICommand(
                executor_name="test-executor", class_name="MyTestExecutor", f_name="f"
            )
        ]
        exec_environments = ["training"]

        td = Path(d)
        env_config_path = td / "env_config.yaml"
        created_env_info_path = td / "output_file.yaml"

        prefix=f"airflow_batch_execute_testing_{generate_random_string()}"
        regions=[region]
        _create_default_batch_environment_config(
            prefix=prefix,
            output_path=env_config_path,
            regions=regions,
        )

        with open(env_config_path) as f:
            env_config = yaml.safe_load(f)
        logger.info(f"{env_config=}")
        with create_testing_batch_environment_ctx(
            input_yaml_path=env_config_path, output_yaml_path=created_env_info_path # type: ignore
        ):
            abe = AirflowAWSBatchExecutor(
                steps=steps,
                region=region,
                exec_environments=exec_environments, # type: ignore
                batch_environment_arn_path=created_env_info_path,
            )

            dag_file_path, run_id = abe.execute(
                description="test description",
                tags="test_tag",
                data_path_url=data_path_url,
                model_path_url=model_path_url,
            )
            logger.info(f"{dag_file_path=}")
            logger.info(f"{run_id=}")

            dag_id = str(dag_file_path).split("/")[-1].split(".py")[0]
            state = wait_for_run_to_complete(dag_id=dag_id, run_id=run_id, timeout=1200)
            logger.info(f"{state=}")
            dag_file_path.unlink()

In [None]:
#| export


@call_parse
def test_aws_batch_executor(region: Param("region", str) = "eu-west-1"):  # type: ignore
    """
    Create throw away environment for aws batch and execute airflow batch executor
    """
    _test_aws_batch_executor(region=region)

In [None]:
# | eval: false
test_aws_batch_executor()

paths=[Path('/tmp/tmp0a2_1jin/data'), Path('/tmp/tmp0a2_1jin/model')]
[INFO] __main__: env_config={'eu-west-1': {'csv_processing': {'compute_environment': {'instance_type': 'r5.16xlarge', 'max_instances': 10, 'min_instances': 0, 'name': 'airflow_batch_execute_testing_Z6PM5M_csv_processing_compute_environment'}, 'job_definition': {'image': 'registry.gitlab.com/airt.ai/airt-service:dev', 'name': 'airflow_batch_execute_testing_Z6PM5M_csv_processing_job_definition'}, 'job_queue': {'name': 'airflow_batch_execute_testing_Z6PM5M_csv_processing_job_queue', 'priority': 100}}, 'predictions': {'compute_environment': {'instance_type': 'g4dn.xlarge', 'max_instances': 10, 'min_instances': 0, 'name': 'airflow_batch_execute_testing_Z6PM5M_predictions_compute_environment'}, 'job_definition': {'image': 'registry.gitlab.com/airt.ai/airt-service:dev', 'name': 'airflow_batch_execute_testing_Z6PM5M_predictions_job_definition'}, 'job_queue': {'name': 'airflow_batch_execute_testing_Z6PM5M_predictions_job_queu

[{'dag_id': 'test-executor-my_test_executor-f-data-path-urllocaltmptmp0a2_1jindata-model-path-urllocaltmptmp0a2_1jinmodel', 'run_id': 'airt-service__2022-10-20T08:17:33.883971', 'state': 'running', 'execution_date': '2022-10-20T08:17:37+00:00', 'start_date': '2022-10-20T08:17:38.668217+00:00', 'end_date': ''}]
[INFO] __main__: dag_file_path=Path('/root/airflow/dags/test-executor-my_test_executor-f-data-path-urllocaltmptmp0a2_1jindata-model-path-urllocaltmptmp0a2_1jinmodel.py')
[INFO] __main__: run_id='airt-service__2022-10-20T08:17:33.883971'
[INFO] __main__: state='success'
deleting job definition - csv_processing
deleting job queue - csv_processing
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_csv_processing_job_queue', status=UPDATING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_csv_

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_csv_processing_job_queue', status=DELETING, state=DISABLED
[INFO] air

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_predictions_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.b

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_preprocessing_job_queue', status=DELETING, state=DISABLED
[INFO] airt_ser

[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wait(): self.arn='arn:aws:batch:eu-west-1:617504802562:job-queue/airflow_batch_execute_testing_Z6PM5M_training_job_queue', status=DELETING, state=DISABLED
[INFO] airt_service.aws.batch_utils: wai