In [None]:
# | default_exp azure.batch_utils

In [None]:
from airt.testing import activate_by_import

[INFO] airt.testing.activate_by_import: Testing environment activated.


2023-02-26 13:59:24.939426: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


[INFO] numexpr.utils: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
[INFO] numexpr.utils: NumExpr defaulting to 8 threads.


In [None]:
# | export

import io
import logging
import os
import random
import string
from contextlib import ContextDecorator, contextmanager
from datetime import timedelta
from time import sleep
from typing import *

import azure.batch.models as batchmodels
from airt.logger import get_logger
from azure.batch import BatchServiceClient
from azure.batch.models import BatchErrorException
from azure.common.credentials import ServicePrincipalCredentials
from azure.identity import DefaultAzureCredential
from azure.mgmt.batch import BatchManagementClient

import airt_service.sanitizer

In [None]:
# | exporti

logger = get_logger(__name__)

In [None]:
# | exporti

# This is needed to disable excessive logging from azure-storage-blob library

(logging.getLogger("azure.core.pipeline.policies.http_logging_policy")).setLevel(
    logging.WARNING
)

In [None]:
# | export


def get_random_string(length: int = 6) -> str:
    """Generate random string

    Args:
        length: Random string length to generate

    Returns:
        A random string of given length
    """
    return "".join(
        random.choice(string.ascii_uppercase + string.digits)  # nosec B311
        for _ in range(length)
    )

In [None]:
actual = get_random_string(length=10)
display(actual)
assert isinstance(actual, str)
assert len(actual) == 10

'CDLDPFODT2'

In [None]:
# | export

AUTO_SCALE_FORMULA = """// Get pending tasks for the past 5 minutes.
$samples = $PendingTasks.GetSamplePercent(TimeInterval_Minute * 5);
// If we have fewer than 70 percent data points, we use the last sample point,
// otherwise we use the maximum of last sample point and the history average.
$tasks = $samples < 70 ? max(0,$PendingTasks.GetSample(1)) : max( $PendingTasks.GetSample(1), avg($PendingTasks.GetSample(TimeInterval_Minute * 5)));
// If number of pending tasks is not 0, set targetVM to pending tasks, otherwise
// half of current dedicated.
$targetVMs = $tasks > 0? $tasks:max(0, $TargetDedicatedNodes/2);
// The pool size is capped at 20, if target VM value is more than that, set it
// to 20. This value should be adjusted according to your use case.
$TargetDedicatedNodes = max(0, min($targetVMs, 5));
// Set node deallocation mode - let running tasks finish before removing a node
$NodeDeallocationOption = taskcompletion;"""

In [None]:
AUTO_SCALE_FORMULA

'// Get pending tasks for the past 5 minutes.\n$samples = $PendingTasks.GetSamplePercent(TimeInterval_Minute * 5);\n// If we have fewer than 70 percent data points, we use the last sample point,\n// otherwise we use the maximum of last sample point and the history average.\n$tasks = $samples < 70 ? max(0,$PendingTasks.GetSample(1)) : max( $PendingTasks.GetSample(1), avg($PendingTasks.GetSample(TimeInterval_Minute * 5)));\n// If number of pending tasks is not 0, set targetVM to pending tasks, otherwise\n// half of current dedicated.\n$targetVMs = $tasks > 0? $tasks:max(0, $TargetDedicatedNodes/2);\n// The pool size is capped at 20, if target VM value is more than that, set it\n// to 20. This value should be adjusted according to your use case.\n$TargetDedicatedNodes = max(0, min($targetVMs, 5));\n// Set node deallocation mode - let running tasks finish before removing a node\n$NodeDeallocationOption = taskcompletion;'

In [None]:
# | export


class BatchPool(ContextDecorator):
    def __init__(
        self,
        name: str,
        batch_account_name: str,
        region: str,
        service_principal_credentials: ServicePrincipalCredentials,
    ):
        self.name = name
        self.batch_account_name = batch_account_name
        self.region = region
        self.service_principal_credentials = service_principal_credentials

    @classmethod
    def from_name(
        cls,
        name: str,
        batch_account_name: str,
        region: str,
        service_principal_credentials: ServicePrincipalCredentials,
    ) -> "BatchPool":
        batch_service_client = BatchServiceClient(
            service_principal_credentials,
            batch_url=f"https://{batch_account_name}.{region}.batch.azure.com",
        )
        pool = batch_service_client.pool.get(name)

        return BatchPool(
            name, batch_account_name, region, service_principal_credentials
        )

    @classmethod
    def create(
        cls,
        *,
        name: Optional[str] = None,
        batch_account_name: str,
        region: str,
        service_principal_credentials: ServicePrincipalCredentials,
        image_publisher: str = "microsoft-azure-batch",
        image_offer: str = "ubuntu-server-container",
        image_sku: str = "20-04-lts",
        image_version: str = "latest",
        container_image: str = "ghcr.io/airtai/airt-service:dev",
        docker_compatible: bool = False,
        vm: str = "standard_d2s_v3",
        auto_scale_formula: Optional[str] = None,
    ) -> "BatchPool":
        if name is None:
            name = f"batch-pool-{get_random_string()}"

        if auto_scale_formula is None:
            auto_scale_formula = AUTO_SCALE_FORMULA

        batch_service_client = BatchServiceClient(
            service_principal_credentials,
            batch_url=f"https://{batch_account_name}.{region}.batch.azure.com",
        )

        try:
            pool = batch_service_client.pool.get(name)
            pool_dict = pool.as_dict()
            return BatchPool(
                name, batch_account_name, region, service_principal_credentials
            )
        except BatchErrorException:
            pass

        #         container_registry = batchmodels.ContainerRegistry(
        #             user_name="kumaran@airt.ai",
        #             password="passowerkdfadfa@",
        #             registry_server="ghcr.io",
        #         )
        #         container_configuration = batchmodels.ContainerConfiguration(
        #             container_image_names=[container_image],
        #             container_registries=[container_registry],
        #         )
        container_configuration = (
            batchmodels.ContainerConfiguration() if docker_compatible else None
        )

        new_pool = batchmodels.PoolAddParameter(
            id=name,
            virtual_machine_configuration=batchmodels.VirtualMachineConfiguration(
                image_reference=batchmodels.ImageReference(
                    publisher=image_publisher,
                    offer=image_offer,
                    sku=image_sku,
                    version=image_version,
                ),
                node_agent_sku_id="batch.node.ubuntu 20.04",
                container_configuration=container_configuration,
            ),
            vm_size=vm,
            enable_auto_scale=True,
            auto_scale_formula=auto_scale_formula,
            auto_scale_evaluation_interval=timedelta(minutes=5),
        )

        batch_service_client.pool.add(new_pool)
        batch_pool = BatchPool(
            name, batch_account_name, region, service_principal_credentials
        )
        batch_pool.wait(state="active")
        return batch_pool

    def wait(
        self,
        state: str,
        timeout: int = 0,
        sleep_step: int = 5,
    ) -> None:
        """Wait until the batch pool reaches the given state

        Args:
            state: State to wait for ('active'|'deleting')
            timeout: The maximum time allowed in seconds for the command to complete. If greater than 0,
                then the command will be killed after the timeout
            sleep_step: The time interval in seconds to check the completion status of the command

        Returns:
            The response of describe compute environment
        """
        batch_service_client = BatchServiceClient(
            self.service_principal_credentials,
            batch_url=f"https://{self.batch_account_name}.{self.region}.batch.azure.com",
        )

        i = 0
        while True:
            if 0 < timeout <= i:
                logger.info(
                    f"wait timedout after {i:,d} seconds for pool: '{self.name}'"
                )
                break
            pool = batch_service_client.pool.get(self.name)
            pool_dict = pool.as_dict()
            logger.info(f'wait(): {self.name=}, state={pool_dict["state"]}')
            if pool_dict["state"] == state:
                break
            sleep(sleep_step)
            i = i + sleep_step

    def delete(self) -> None:
        """Delete Batch Pool"""
        batch_service_client = BatchServiceClient(
            self.service_principal_credentials,
            batch_url=f"https://{self.batch_account_name}.{self.region}.batch.azure.com",
        )
        try:
            batch_service_client.pool.delete(self.name)
        except BatchErrorException as e:
            if hasattr(e, "message"):
                error_message = e.message.as_dict()  # type: ignore
                if (
                    "value" in error_message
                    and "marked for deletion" in error_message["value"]
                ):
                    return
            raise e

    def __enter__(self) -> "BatchPool":
        return self

    def __exit__(self, *exc: Any) -> None:
        self.delete()
        self.wait(state="deleting")

In [None]:
service_principal_credentials = ServicePrincipalCredentials(
    client_id=os.environ["AZURE_CLIENT_ID"],
    secret=os.environ["AZURE_CLIENT_SECRET"],
    tenant=os.environ["AZURE_TENANT_ID"],
    resource="https://batch.core.windows.net/",
)

with BatchPool.create(
    batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
    region="westeurope",
    service_principal_credentials=service_principal_credentials,
) as batch_pool:
    display(f"{batch_pool.name=}")

[INFO] __main__: wait(): self.name='batch-pool-ASFHN1', state=active


"batch_pool.name='batch-pool-ASFHN1'"

[INFO] __main__: wait(): self.name='batch-pool-ASFHN1', state=deleting


In [None]:
# | export


class BatchJob(ContextDecorator):
    def __init__(self, name: str, batch_pool: BatchPool):
        self.name = name
        self.batch_account_name = batch_pool.batch_account_name
        self.region = batch_pool.region
        self.service_principal_credentials = batch_pool.service_principal_credentials
        self.batch_pool = batch_pool

    @classmethod
    def from_name(cls, name: str, batch_pool: BatchPool) -> "BatchJob":
        batch_account_name = batch_pool.batch_account_name
        region = batch_pool.region
        service_principal_credentials = batch_pool.service_principal_credentials

        batch_service_client = BatchServiceClient(
            service_principal_credentials,
            batch_url=f"https://{batch_account_name}.{region}.batch.azure.com",
        )
        job = batch_service_client.job.get(name)

        return BatchJob(name, batch_pool)

    @classmethod
    def create(cls, *, name: Optional[str] = None, batch_pool: BatchPool) -> "BatchJob":
        if name is None:
            name = f"batch-job-{get_random_string()}"

        batch_account_name = batch_pool.batch_account_name
        region = batch_pool.region
        service_principal_credentials = batch_pool.service_principal_credentials

        batch_service_client = BatchServiceClient(
            service_principal_credentials,
            batch_url=f"https://{batch_account_name}.{region}.batch.azure.com",
        )

        try:
            job = batch_service_client.job.get(name)
            job_dict = job.as_dict()
            return BatchJob(name, batch_pool)
        except BatchErrorException:
            pass

        new_job = batchmodels.JobAddParameter(
            id=name, pool_info=batchmodels.PoolInformation(pool_id=batch_pool.name)
        )

        batch_service_client.job.add(new_job)

        batch_job = BatchJob(name, batch_pool)
        batch_job.wait(state="active")
        return batch_job

    def wait(
        self,
        state: str,
        timeout: int = 0,
        sleep_step: int = 5,
    ) -> None:
        """Wait until the batch job reaches the given state

        Args:
            state: State to wait for ('active'|'deleting')
            timeout: The maximum time allowed in seconds for the command to complete. If greater than 0,
                then the command will be killed after the timeout
            sleep_step: The time interval in seconds to check the completion status of the command

        Returns:
            The response of describe compute environment
        """
        batch_service_client = BatchServiceClient(
            self.service_principal_credentials,
            batch_url=f"https://{self.batch_account_name}.{self.region}.batch.azure.com",
        )

        i = 0
        while True:
            if 0 < timeout <= i:
                logger.info(
                    f"wait timedout after {i:,d} seconds for pool: '{self.name}'"
                )
                break
            job = batch_service_client.job.get(self.name)
            job_dict = job.as_dict()
            logger.info(f'wait(): {self.name=}, state={job_dict["state"]}')
            if job_dict["state"] == state:
                break
            sleep(sleep_step)
            i = i + sleep_step

    def delete(self) -> None:
        """Delete Batch Pool"""
        batch_service_client = BatchServiceClient(
            self.service_principal_credentials,
            batch_url=f"https://{self.batch_account_name}.{self.region}.batch.azure.com",
        )
        try:
            batch_service_client.job.delete(self.name)
        except BatchErrorException as e:
            if hasattr(e, "message"):
                error_message = e.message.as_dict()  # type: ignore
                if (
                    "value" in error_message
                    and "job does not exist" in error_message["value"]
                ):
                    return
            raise e

    def __enter__(self) -> "BatchJob":
        return self

    def __exit__(self, *exc: Any) -> None:
        self.delete()
        #         self.wait(state="deleting")

In [None]:
service_principal_credentials = ServicePrincipalCredentials(
    client_id=os.environ["AZURE_CLIENT_ID"],
    secret=os.environ["AZURE_CLIENT_SECRET"],
    tenant=os.environ["AZURE_TENANT_ID"],
    resource="https://batch.core.windows.net/",
)

with BatchPool.create(
    batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
    region="westeurope",
    service_principal_credentials=service_principal_credentials,
) as batch_pool:
    display(f"{batch_pool.name=}")
    with BatchJob.create(batch_pool=batch_pool) as batch_job:
        display(f"{batch_job.name=}")

[INFO] __main__: wait(): self.name='batch-pool-OXW33Z', state=active


"batch_pool.name='batch-pool-OXW33Z'"

[INFO] __main__: wait(): self.name='batch-job-G3PV3V', state=active


"batch_job.name='batch-job-G3PV3V'"

[INFO] __main__: wait(): self.name='batch-pool-OXW33Z', state=deleting


In [None]:
# | export


class BatchTask(ContextDecorator):
    def __init__(self, name: str, batch_job: BatchJob):
        self.name = name
        self.batch_account_name = batch_job.batch_account_name
        self.region = batch_job.region
        self.service_principal_credentials = batch_job.service_principal_credentials
        self.batch_job = batch_job

    @classmethod
    def from_name(cls, name: str, batch_job: BatchJob) -> "BatchTask":
        batch_account_name = batch_job.batch_account_name
        region = batch_job.region
        service_principal_credentials = batch_job.service_principal_credentials

        batch_service_client = BatchServiceClient(
            service_principal_credentials,
            batch_url=f"https://{batch_account_name}.{region}.batch.azure.com",
        )
        task = batch_service_client.task.get(batch_job.name, name)

        return BatchTask(name, batch_job)

    @classmethod
    def create(
        cls,
        *,
        name: Optional[str] = None,
        command: str,
        container_settings: Optional[batchmodels.TaskContainerSettings] = None,
        environment_vars: Optional[Dict[str, str]] = None,
        batch_job: BatchJob,
    ) -> "BatchTask":
        if name is None:
            name = f"batch-task-{get_random_string()}"

        batch_account_name = batch_job.batch_account_name
        region = batch_job.region
        service_principal_credentials = batch_job.service_principal_credentials

        batch_service_client = BatchServiceClient(
            service_principal_credentials,
            batch_url=f"https://{batch_account_name}.{region}.batch.azure.com",
        )

        try:
            task = batch_service_client.task.get(batch_job.name, name)
            task_dict = task.as_dict()
            return BatchTask(name, batch_job)
        except BatchErrorException:
            pass

        environment_settings = []
        if environment_vars is not None:
            environment_settings = [
                batchmodels.EnvironmentSetting(name=name, value=value)
                for name, value in environment_vars.items()
            ]

        new_task = batchmodels.TaskAddParameter(
            id=name,
            command_line=command,
            container_settings=container_settings,
            environment_settings=environment_settings,
        )

        batch_service_client.task.add_collection(batch_job.name, [new_task])

        batch_task = BatchTask(name, batch_job)
        batch_task.wait(state="active")
        return batch_task

    def wait(
        self,
        state: str,
        timeout: int = 0,
        sleep_step: int = 5,
    ) -> None:
        """Wait until the batch job reaches the given state

        Args:
            state: State to wait for ('active'|'deleting')
            timeout: The maximum time allowed in seconds for the command to complete. If greater than 0,
                then the command will be killed after the timeout
            sleep_step: The time interval in seconds to check the completion status of the command

        Returns:
            The response of describe compute environment
        """
        batch_service_client = BatchServiceClient(
            self.service_principal_credentials,
            batch_url=f"https://{self.batch_account_name}.{self.region}.batch.azure.com",
        )

        i = 0
        while True:
            if 0 < timeout <= i:
                logger.info(
                    f"wait timedout after {i:,d} seconds for pool: '{self.name}'"
                )
                break
            task = batch_service_client.task.get(self.batch_job.name, self.name)
            task_dict = task.as_dict()
            logger.info(f'wait(): {self.name=}, state={task_dict["state"]}')
            if task_dict["state"] == state:
                break
            sleep(sleep_step)
            i = i + sleep_step

    def delete(self) -> None:
        """Delete Batch task"""
        batch_service_client = BatchServiceClient(
            self.service_principal_credentials,
            batch_url=f"https://{self.batch_account_name}.{self.region}.batch.azure.com",
        )
        try:
            batch_service_client.task.delete(self.batch_job.name, self.name)
        except BatchErrorException as e:
            #             if hasattr(e, "message"):
            #                 error_message = e.message.as_dict()
            #                 if (
            #                     "value" in error_message
            #                     and "job does not exist" in error_message["value"]
            #                 ):
            #                     return
            raise e

    def output(self) -> None:
        batch_service_client = BatchServiceClient(
            self.service_principal_credentials,
            batch_url=f"https://{self.batch_account_name}.{self.region}.batch.azure.com",
        )

        task = batch_service_client.task.get(self.batch_job.name, self.name)

        try:
            stream = batch_service_client.file.get_from_task(
                self.batch_job.name, self.name, "stdout.txt"
            )
        except BatchErrorException as e:
            stream = batch_service_client.file.get_from_task(
                self.batch_job.name, self.name, "stderr.txt"
            )

        output = io.BytesIO()
        try:
            for data in stream:
                output.write(data)
            file_text = output.getvalue().decode("utf-8")
        finally:
            output.close()

        logger.info(f"task output is: {file_text}")

    def __enter__(self) -> "BatchTask":
        return self

    def __exit__(self, *exc: Any) -> None:
        self.wait(state="completed")
        self.output()
        self.delete()

In [None]:
# | eval: false

service_principal_credentials = ServicePrincipalCredentials(
    client_id=os.environ["AZURE_CLIENT_ID"],
    secret=os.environ["AZURE_CLIENT_SECRET"],
    tenant=os.environ["AZURE_TENANT_ID"],
    resource="https://batch.core.windows.net/",
)

with BatchPool.create(
    batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
    region="westeurope",
    service_principal_credentials=service_principal_credentials,
) as batch_pool:
    display(f"{batch_pool.name=}")
    with BatchJob.create(batch_pool=batch_pool) as batch_job:
        display(f"{batch_job.name=}")
        with BatchTask.create(
            command="echo hello_there", batch_job=batch_job
        ) as batch_task:
            display(f"{batch_task.name=}")

[INFO] __main__: wait(): self.name='batch-pool-R54IS2', state=active


"batch_pool.name='batch-pool-R54IS2'"

[INFO] __main__: wait(): self.name='batch-job-QTSSBD', state=active


"batch_job.name='batch-job-QTSSBD'"

[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active


"batch_task.name='batch-task-VJ37SZ'"

[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name='batch-task-VJ37SZ', state=active
[INFO] __main__: wait(): self.name

In [None]:
# | export


def azure_batch_create_job(
    *,
    name: Optional[str] = None,
    command: str,
    container_settings: Optional[batchmodels.TaskContainerSettings] = None,
    environment_vars: Optional[Dict[str, str]] = None,
    batch_job_name: str,
    batch_pool_name: str,
    batch_account_name: str,
    region: str,
    service_principal_credentials: Optional[ServicePrincipalCredentials] = None,
) -> BatchTask:
    if region != "westeurope":
        raise ValueError("Only westeurope region is supported for now")

    if service_principal_credentials is None:
        service_principal_credentials = ServicePrincipalCredentials(
            client_id=os.environ["AZURE_CLIENT_ID"],
            secret=os.environ["AZURE_CLIENT_SECRET"],
            tenant=os.environ["AZURE_TENANT_ID"],
            resource="https://batch.core.windows.net/",
        )

    batch_pool = BatchPool.from_name(
        name=batch_pool_name,
        batch_account_name=batch_account_name,
        region=region,
        service_principal_credentials=service_principal_credentials,
    )

    batch_job = BatchJob.from_name(name=batch_job_name, batch_pool=batch_pool)

    batch_task = BatchTask.create(
        name=name,
        command=command,
        container_settings=container_settings,
        environment_vars=environment_vars,
        batch_job=batch_job,
    )
    logger.info(f"{batch_task.name=}")
    return batch_task

In [None]:
# | eval: false

service_principal_credentials = ServicePrincipalCredentials(
    client_id=os.environ["AZURE_CLIENT_ID"],
    secret=os.environ["AZURE_CLIENT_SECRET"],
    tenant=os.environ["AZURE_TENANT_ID"],
    resource="https://batch.core.windows.net/",
)

batch_pool = BatchPool.from_name(
    name="cpu-pool",
    batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
    region="westeurope",
    service_principal_credentials=service_principal_credentials,
)
display(f"{batch_pool.name=}")
batch_job = BatchJob.from_name(name="cpu-job", batch_pool=batch_pool)
display(f"{batch_job.name=}")
batch_task = azure_batch_create_job(
    command="env",
    container_settings=batchmodels.TaskContainerSettings(
        image_name=f"ghcr.io/airtai/airt-service:dev"
    ),
    environment_vars={"DUMMY_KEY": "the dummy value"},
    batch_job_name=batch_job.name,
    batch_pool_name=batch_pool.name,
    batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
    region="westeurope",
    service_principal_credentials=service_principal_credentials,
)
batch_task.wait(state="completed")
batch_task.output()

"batch_pool.name='cpu-pool'"

"batch_job.name='cpu-job'"

[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: batch_task.name='batch-task-0B5V45'
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=active
[INFO] __main__: wait(): self.name='batch-task-0B5

[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=running
[INFO] __main__: wait(): self.name='batch-task-0B5V45', state=completed
[INFO] __main__: task output is: PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:

In [None]:
# | eval: false

service_principal_credentials = ServicePrincipalCredentials(
    client_id=os.environ["AZURE_CLIENT_ID"],
    secret=os.environ["AZURE_CLIENT_SECRET"],
    tenant=os.environ["AZURE_TENANT_ID"],
    resource="https://batch.core.windows.net/",
)

with BatchPool.create(
    batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
    region="westeurope",
    service_principal_credentials=service_principal_credentials,
    docker_compatible=True,
) as batch_pool:
    display(f"{batch_pool.name=}")
    with BatchJob.create(batch_pool=batch_pool) as batch_job:
        display(f"{batch_job.name=}")
        batch_task = azure_batch_create_job(
            command="env",
            container_settings=batchmodels.TaskContainerSettings(
                image_name=f"ghcr.io/airtai/airt-service:dev"
            ),
            environment_vars={"DUMMY_KEY": "the dummy value"},
            batch_job_name=batch_job.name,
            batch_pool_name=batch_pool.name,
            batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
            region="westeurope",
            service_principal_credentials=service_principal_credentials,
        )
        batch_task.wait(state="completed")
        batch_task.output()
        batch_task.delete()

[INFO] __main__: wait(): self.name='batch-pool-BND64R', state=active


"batch_pool.name='batch-pool-BND64R'"

[INFO] __main__: wait(): self.name='batch-job-UDTAO4', state=active


"batch_job.name='batch-job-UDTAO4'"

[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: batch_task.name='batch-task-VSMBRQ'
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSMBRQ', state=active
[INFO] __main__: wait(): self.name='batch-task-VSM

[INFO] __main__: wait(): self.name='batch-pool-BND64R', state=deleting


In [None]:
# | eval: false

service_principal_credentials = ServicePrincipalCredentials(
    client_id=os.environ["AZURE_CLIENT_ID"],
    secret=os.environ["AZURE_CLIENT_SECRET"],
    tenant=os.environ["AZURE_TENANT_ID"],
    resource="https://batch.core.windows.net/",
)

with BatchPool.create(
    batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
    region="westeurope",
    service_principal_credentials=service_principal_credentials,
) as batch_pool:
    display(f"{batch_pool.name=}")
    with BatchJob.create(batch_pool=batch_pool) as batch_job:
        display(f"{batch_job.name=}")
        batch_task = azure_batch_create_job(
            command="env",
            #             command="echo $DUMMY_KEY",
            environment_vars={"DUMMY_KEY": "the dummy value"},
            batch_job_name=batch_job.name,
            batch_pool_name=batch_pool.name,
            batch_account_name=os.environ["AZURE_BATCH_ACCOUNT"],
            region="westeurope",
            service_principal_credentials=service_principal_credentials,
        )
        batch_task.wait(state="completed")
        batch_task.output()
        batch_task.delete()

[INFO] __main__: wait(): self.name='batch-pool-YTWUM7', state=active


"batch_pool.name='batch-pool-YTWUM7'"

[INFO] __main__: wait(): self.name='batch-job-B9NIZR', state=active


"batch_job.name='batch-job-B9NIZR'"

[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: batch_task.name='batch-task-IHQOF6'
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQOF6', state=active
[INFO] __main__: wait(): self.name='batch-task-IHQ

In [None]:
# # export


# class BatchAccount(ContextDecorator):
#     """A class for creating and managing the azure batch account"""

#     def __init__(self, response, region):
#         """Constructs a new BatchAccount instance

#         Args:
#             response: The compute environment describe response
#         """
#         self.response = response
#         self.region = region

#     @classmethod
#     def create(
#         cls,
#         *,
#         name: Optional[str] = None,
#         resource_group_name: str,
#         location: str = "westeurope",
#     ):
#         if name is None:
#             name = f"batch-account-{get_random_string()}"

#         bmc = BatchManagementClient(
#             DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"]
#         )
#         b_account = bmc.batch_account.begin_create(
#             resource_group_name=resource_group_name,
#             account_name=name,
#             parameters=BatchAccountCreateParameters(location=location),
#         )

#     def __enter__(self):
#         return self

#     def __exit__(self, *exc):
#         #         client = boto3.client("batch", region_name=self.region)
#         #         self.update(state="DISABLED")
#         #         self.wait(status="VALID", state="DISABLED")
#         #         self.delete()
#         return False

In [None]:
# batch_service_client = BatchServiceClient(service_principal_credentials, batch_url="https://testairtbatch.westeurope.batch.azure.com")
# bmc = BatchManagementClient(DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"])
# help(bmc.batch_account.get_keys)