In [None]:
#| default_exp data.azure_blob_storage

In [None]:
from airt.testing import activate_by_import

[INFO] airt.testing.activate_by_import: Testing environment activated.
[INFO] numexpr.utils: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
[INFO] numexpr.utils: NumExpr defaulting to 8 threads.
[INFO] airt.keras.helpers: Using a single GPU #0 with memory_limit 1024 MB


In [None]:
#| export

import shutil
from datetime import datetime
from typing import *

from azure.identity import DefaultAzureCredential
from fastcore.script import call_parse
from fastcore.utils import *
from sqlmodel import select

from airt.logger import get_logger
from airt.remote_path import RemotePath
from airt_service.aws.utils import create_s3_datablob_path
from airt_service.azure.utils import create_azure_blob_storage_datablob_path
from airt_service.data.utils import calculate_data_object_folder_size_and_path
from airt_service.data.utils import calculate_data_object_pulled_on
from airt_service.data.utils import get_azure_blob_storage_connection_params_from_db_uri

from airt_service.db.models import get_session_with_context, DataBlob, PredictionPush
from airt_service.helpers import truncate
from airt_service.constants import METADATA_FOLDER_PATH

In [None]:
import json
import os
from datetime import timedelta

import dask.dataframe as dd
import pandas as pd
import pytest
from azure.mgmt.storage import StorageManagementClient
from fastapi import BackgroundTasks

from airt_service.azure.utils import create_azure_resource_group_storage_account_and_container
from airt_service.data.utils import create_db_uri_for_azure_blob_storage_datablob
from airt_service.db.models import (
    DataSource,
    User,
    create_user_for_testing,
    get_session,
)
from airt_service.model.train import TrainRequest, train_model, predict_model
from airt_service.helpers import (
    commit_or_rollback,
    set_env_variable_context,
    #     create_s3_prediction_path,
)

In [None]:
test_username = create_user_for_testing(subscription_type="small")
display(test_username)

'kfkeinzylr'

In [None]:
#| exporti

logger = get_logger(__name__)

In [None]:
#| export


def copy_between_azure_blob_storage(
    source_remote_url: str,
    destination_remote_url: str,
    source_credential: Optional[Union[str, DefaultAzureCredential]] = None,
    destination_credential: Optional[Union[str, DefaultAzureCredential]] = None,
    datablob: Optional[DataBlob] = None,
    skip_metadata_dir: Optional[bool] = False,
):
    """Copy files from source azure blob storage path and to destination azure blob storage path

    By default, all files are copied to the destination_remote_url. In case
    the **skip_metadata_dir** flag is set to **True**, then the **.metadata_by_airt**
    folder will not be copied to the destination_remote_url.

    Args:
        source_remote_url: S3 uri where files to copy are located
        destination_remote_url: S3 uri to copy files
        source_credential: Source azure blob storage credential
        destination_credential: Destination azure blob storage credential
        datablob: Optional datablob object to calculate pulled_on field
        skip_metadata_dir: If set to **True** then the **.metadata_by_airt** folder
            will not be copied to the destination_remote_url.
    """
    source_credential = (
        source_credential if source_credential else DefaultAzureCredential()
    )
    destination_credential = (
        destination_credential if destination_credential else DefaultAzureCredential()
    )

    with RemotePath.from_url(
        remote_url=destination_remote_url,
        pull_on_enter=False,
        push_on_exit=True,
        exist_ok=True,
        parents=True,
        credential=destination_credential,
    ) as destination_azure_blob_storage_path:
        sync_path = destination_azure_blob_storage_path.as_path()
        with RemotePath.from_url(
            remote_url=source_remote_url,
            pull_on_enter=True,
            push_on_exit=False,
            exist_ok=True,
            parents=False,
            credential=source_credential,
        ) as source_azure_blob_storage_path:
            if datablob is not None:
                calculate_data_object_pulled_on(datablob)

            source_files = source_azure_blob_storage_path.as_path().iterdir()

            if skip_metadata_dir:
                source_files = [
                    f for f in source_files if METADATA_FOLDER_PATH not in str(f)
                ]

            for f in source_files:
                shutil.move(str(f), sync_path)

        if len(list(sync_path.glob("*"))) == 0:
            raise ValueError(
                f"URI {source_remote_url} is invalid or no files available"
            )

In [None]:
# Test case for skip_metadata_dir=True

with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == test_username)).one()

    datablob = DataBlob(
        type="azure_blob_storage",
        uri="",
        source="",
        cloud_provider="azure",
        region="westeurope",
        total_steps=1,
        user=user,
    )
    with commit_or_rollback(session):
        session.add(datablob)

    # Creating source bucket
    datablob_id = session.exec(select(DataBlob).where(DataBlob.uuid == datablob.uuid)).one().id
    container_client, azure_blob_storage_path = create_azure_blob_storage_datablob_path(
        user_id=user.id, datablob_id=datablob_id, region=datablob.region
    )
    source_remote_url = f"{container_client.url}/{azure_blob_storage_path}"

    with RemotePath.from_url(
        remote_url=source_remote_url,
        pull_on_enter=False,
        push_on_exit=True,
        exist_ok=True,
        parents=True,
    ) as cache_path:
        processed_cache_path = cache_path.as_path()
        df = pd.util.testing.makeDataFrame().set_index("A")
        ddf = dd.from_pandas(df, npartitions=1)
        ddf.to_parquet(processed_cache_path)
    #     (processed_cache_path / "file-1.parquet").touch()

        metadata_folder_path = processed_cache_path / METADATA_FOLDER_PATH
        metadata_folder_path.mkdir(parents=True, exist_ok=True)

        (metadata_folder_path / "metadata-1.parquet").touch()
        (metadata_folder_path / "metadata-2.parquet").touch()

    # Creating destination bucket
    datablob = DataBlob(
        type="azure_blob_storage",
        uri="",
        source="",
        cloud_provider="azure",
        region="westeurope",
        total_steps=1,
        user=user,
    )
    with commit_or_rollback(session):
        session.add(datablob)

    datablob_id = session.exec(select(DataBlob).where(DataBlob.uuid == datablob.uuid)).one().id
    destination_container_client, destination_azure_blob_storage_path = create_azure_blob_storage_datablob_path(
        user_id=user.id, datablob_id=datablob_id, region=datablob.region
    )

    destination_remote_url = f"{destination_container_client.url}/{destination_azure_blob_storage_path}"

    print(f"{source_remote_url=}")
    print(f"{destination_remote_url=}")

    copy_between_azure_blob_storage(
        source_remote_url=source_remote_url,
        destination_remote_url=destination_remote_url,
        skip_metadata_dir=True,
    )

    # Validating the contents of the destination bucket
    with RemotePath.from_url(
        remote_url=destination_remote_url,
        pull_on_enter=True,
        push_on_exit=False,
        exist_ok=True,
        parents=False,
    ) as cache_path:
        files = list(cache_path.as_path().rglob("*.*"))
        assert len(files) == 1, len(files)
        !ls {cache_path.as_path()}

[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.default: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url https://kumsairtsdevwesteurope.blob.core.windows.net/kumsairtsdevwesteurope/135/datablob/170
[INFO] airt.remote_path: AzureBlobPath._create_cache_path(): created cache path: /tmp/httpskumsairtsdevwesteuropeblobcorewindowsnetkumsairtsdevwesteurope135datablob170_cached_1176aujv
[INFO] airt.remote_pa

  import pandas.util.testing


[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.default: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.default: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] airt.remote_path: AzureBlobPath._clean_up(): removing local cache path /tmp/httpskumsairtsdevwesteuropeblobcorewindowsnetkumsairtsdevwesteurope135datablob170_cached_1176aujv
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.default: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.environment: Environment is configu

In [None]:
# Test case for skip_metadata_dir=False

with get_session_with_context() as session:
    datablob = DataBlob(
        type="azure_blob_storage",
        uri="",
        source="",
        cloud_provider="azure",
        region="westeurope",
        total_steps=1,
        user=user,
    )
    with commit_or_rollback(session):
        session.add(datablob)

    datablob_id = session.exec(select(DataBlob).where(DataBlob.uuid == datablob.uuid)).one().id
    destination_container_client, destination_azure_blob_storage_path = create_azure_blob_storage_datablob_path(
        user_id=user.id, datablob_id=datablob_id, region=datablob.region
    )

    destination_remote_url = f"{destination_container_client.url}/{destination_azure_blob_storage_path}"

    print(f"{source_remote_url=}")
    print(f"{destination_remote_url=}")

    storage_account_name = create_azure_resource_group_storage_account_and_container(storage_account_region="westeurope")
    storage_client = StorageManagementClient(DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"])
    keys = storage_client.storage_accounts.list_keys(os.environ["AZURE_RESOURCE_GROUP"], storage_account_name)
    credential = keys.keys[0].value

    copy_between_azure_blob_storage(
        source_remote_url=source_remote_url,
        destination_remote_url=destination_remote_url,
        source_credential=credential,
        destination_credential=credential,
        skip_metadata_dir=False,
    )

    # Validating the contents of the destination bucket
    with RemotePath.from_url(
        remote_url=destination_remote_url,
        pull_on_enter=True,
        push_on_exit=False,
        exist_ok=True,
        parents=False,
    ) as cache_path:
        files = list(cache_path.as_path().rglob("*.*"))
        assert len(files) == 4, len(files)
        !ls {cache_path.as_path()}

[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.default: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
source_remote_url='https://kumsairtsdevwesteurope.blob.core.windows.net/kumsairtsdevwesteurope/135/datablob/170'
destination_remote_url='https://kumsairtsdevwesteurope.blob.core.windows.net/kumsairtsdevwesteurope/135/datablob/172'
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: Manag

In [None]:
#| export


@call_parse
def azure_blob_storage_pull(datablob_id: int):  # type: ignore
    """Pull the data from azure blob storage and updates progress in db

    Args:
        datablob_id: Id of datablob in db

    Example:
        The following code executes a CLI command:
        ```azure_blob_storage_pull 1
        ```
    """
    with get_session_with_context() as session:
        datablob = session.exec(
            select(DataBlob).where(DataBlob.id == datablob_id)
        ).one()

        datablob.error = None
        datablob.completed_steps = 0
        datablob.folder_size = None
        datablob.path = None

        (
            source_remote_url,
            source_credential,
        ) = get_azure_blob_storage_connection_params_from_db_uri(db_uri=datablob.uri)

        try:
            if datablob.cloud_provider == "aws":
                destination_bucket, s3_path = create_s3_datablob_path(
                    user_id=datablob.user.id,
                    datablob_id=datablob.id,
                    region=datablob.region,
                )
                destination_remote_url = f"s3://{destination_bucket.name}/{s3_path}"
            elif datablob.cloud_provider == "azure":
                (
                    destination_container_client,
                    destination_azure_blob_storage_path,
                ) = create_azure_blob_storage_datablob_path(
                    user_id=datablob.user.id,
                    datablob_id=datablob.id,
                    region=datablob.region,
                )
                destination_remote_url = f"{destination_container_client.url}/{destination_azure_blob_storage_path}"

            with RemotePath.from_url(
                remote_url=destination_remote_url,
                pull_on_enter=False,
                push_on_exit=True,
                exist_ok=True,
                parents=True,
            ) as destination_remote_path:
                sync_path = destination_remote_path.as_path()
                with RemotePath.from_url(
                    remote_url=source_remote_url,
                    pull_on_enter=True,
                    push_on_exit=False,
                    exist_ok=True,
                    parents=False,
                    credential=source_credential,
                ) as source_azure_blob_storage_path:
                    calculate_data_object_pulled_on(datablob)

                    source_files = source_azure_blob_storage_path.as_path().iterdir()
                    for f in source_files:
                        shutil.move(str(f), sync_path)

                if len(list(sync_path.glob("*"))) == 0:
                    raise ValueError(
                        f"URI {source_remote_url} is invalid or no files available"
                    )

            # Calculate folder size in S3/Azure blob storage
            calculate_data_object_folder_size_and_path(datablob)
        except Exception as e:
            datablob.error = truncate(str(e))

        session.add(datablob)
        session.commit()

In [None]:
with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == test_username)).one()
    uri = "https://testairtservice.blob.core.windows.net/test-container/account_312571_events"
    region = "westeurope"

    storage_client = StorageManagementClient(
        DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"]
    )
    keys = storage_client.storage_accounts.list_keys(
        "test-airt-service", "testairtservice"
    )
    credential = keys.keys[0].value
    datablob = DataBlob(
        type="azure_blob_storage",
        uri=create_db_uri_for_azure_blob_storage_datablob(
            uri=uri,
            credential=credential,
        ),
        source=uri,
        cloud_provider="azure",
        region=region,
        total_steps=1,
        user=user,
    )
    with commit_or_rollback(session):
        session.add(datablob)

    assert not datablob.folder_size
    assert not datablob.path

    datablob_id = session.exec(select(DataBlob).where(DataBlob.uuid == datablob.uuid)).one().id
    azure_blob_storage_pull(datablob_id=datablob_id)
    datablob_id = datablob_id
    user_id = user.id

with get_session_with_context() as session:
    datablob = session.exec(select(DataBlob).where(DataBlob.id == datablob_id)).one()
    display(datablob)
    assert datablob.folder_size == 11219613, datablob.folder_size
    assert f"{region}/{user_id}/datablob/{datablob_id}" in datablob.path, datablob.path

[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.default: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the followi

DataBlob(id=173, uuid=UUID('1bac0124-980c-4e45-8e10-2edef38a8ee0'), type='azure_blob_storage', uri='https://xLFcltokRem1ADQaM4LL81XXkmvb21rZQhUqbo3C4RjIG4yeMneOJOOc9AWQOa9LeNLH6EuMPe4H+ASt7kFM+Q==@testairtservice.blob.core.windows.net/test-container/account_312571_events', source='https://testairtservice.blob.core.windows.net/test-container/account_312571_events', total_steps=1, completed_steps=1, folder_size=11219613, cloud_provider=<CloudProvider.azure: 'azure'>, region='westeurope', error=None, disabled=False, path='https://kumsairtsdevwesteurope.blob.core.windows.net/kumsairtsdevwesteurope/135/datablob/173', created=datetime.datetime(2022, 9, 13, 11, 19, 52), user_id=135, pulled_on=datetime.datetime(2022, 9, 13, 11, 20, 1), tags=[])

In [None]:
with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == test_username)).one()
    uri = "https://testairtservice.blob.core.windows.net/test-container/account_312571_events"
    region = "westeurope"

    storage_client = StorageManagementClient(
        DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"]
    )
    keys = storage_client.storage_accounts.list_keys(
        "test-airt-service", "testairtservice"
    )
    credential = keys.keys[0].value
    datablob = DataBlob(
        type="azure_blob_storage",
        uri=create_db_uri_for_azure_blob_storage_datablob(
            uri=uri,
            credential=credential,
        ),
        source=uri,
        cloud_provider="aws",
        region="eu-west-3",
        total_steps=1,
        user=user,
    )
    with commit_or_rollback(session):
        session.add(datablob)

    assert not datablob.folder_size
    assert not datablob.path

    datablob_id = session.exec(select(DataBlob).where(DataBlob.uuid == datablob.uuid)).one().id
    azure_blob_storage_pull(datablob_id=datablob_id)
    datablob_id = datablob_id
    user_id = user.id

with get_session_with_context() as session:
    datablob = session.exec(select(DataBlob).where(DataBlob.id == datablob_id)).one()
    display(datablob)
    assert datablob.folder_size == 11219613, datablob.folder_size
    assert (
        datablob.path
        == f"s3://{os.environ['STORAGE_BUCKET_PREFIX']}-eu-west-3/{user_id}/datablob/{datablob.id}"
    ), datablob.path

[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] botocore.credentials: Found credentials in environment variables.
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url s3://kumaran-airt-service-eu-west-3/135/datablob/175
[INFO] airt.remote_path: S3Path._create_cache_path(): created cache path: /tmp/s3kumaran-airt-service-eu-west-3135datablob175_cached_ljuy4_ku
[INFO] airt.remote_path: S3Path.__init__(): created object for accessing s3://kumaran-airt-service-eu-west-3/135/datablob/175 locally in /tmp/s3kumaran-airt-service-eu-west-3135datablob175_cached_ljuy4_ku
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url https://testairtservice.blob.core.wind

DataBlob(id=175, uuid=UUID('2ae5aed8-0db3-47d4-9e15-44b84e9a4d8f'), type='azure_blob_storage', uri='https://xLFcltokRem1ADQaM4LL81XXkmvb21rZQhUqbo3C4RjIG4yeMneOJOOc9AWQOa9LeNLH6EuMPe4H+ASt7kFM+Q==@testairtservice.blob.core.windows.net/test-container/account_312571_events', source='https://testairtservice.blob.core.windows.net/test-container/account_312571_events', total_steps=1, completed_steps=1, folder_size=11219613, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-3', error=None, disabled=False, path='s3://kumaran-airt-service-eu-west-3/135/datablob/175', created=datetime.datetime(2022, 9, 13, 11, 20, 19), user_id=135, pulled_on=datetime.datetime(2022, 9, 13, 11, 20, 25), tags=[])

In [None]:
with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == test_username)).one()
    uri = "https://testairtservice.blob.core.windows.net/test-container/folder_does_not_exists"
    region = "westeurope"

    storage_client = StorageManagementClient(
        DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"]
    )
    keys = storage_client.storage_accounts.list_keys(
        "test-airt-service", "testairtservice"
    )
    credential = keys.keys[0].value
    datablob = DataBlob(
        type="azure_blob_storage",
        uri=create_db_uri_for_azure_blob_storage_datablob(
            uri=uri,
            credential=credential,
        ),
        source=uri,
        cloud_provider="azure",
        region=region,
        total_steps=1,
        user=user,
    )
    with commit_or_rollback(session):
        session.add(datablob)

    assert not datablob.folder_size
    assert not datablob.path

    datablob_id = session.exec(select(DataBlob).where(DataBlob.uuid == datablob.uuid)).one().id
    azure_blob_storage_pull(datablob_id=datablob_id)

with get_session_with_context() as session:
    datablob = session.exec(select(DataBlob).where(DataBlob.id == datablob_id)).one()
    display(datablob)
    assert f"URI {uri} is invalid or no files available" in datablob.error
    assert not datablob.folder_size
    assert not datablob.path

[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.default: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the followi

DataBlob(id=177, uuid=UUID('78e4e376-32b2-48b0-9047-5fed4dd56975'), type='azure_blob_storage', uri='https://xLFcltokRem1ADQaM4LL81XXkmvb21rZQhUqbo3C4RjIG4yeMneOJOOc9AWQOa9LeNLH6EuMPe4H+ASt7kFM+Q==@testairtservice.blob.core.windows.net/test-container/folder_does_not_exists', source='https://testairtservice.blob.core.windows.net/test-container/folder_does_not_exists', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.azure: 'azure'>, region='westeurope', error='URI https://testairtservice.blob.core.windows.net/test-container/folder_does_not_exists is invalid or no files available', disabled=False, path=None, created=datetime.datetime(2022, 9, 13, 11, 20, 47), user_id=135, pulled_on=datetime.datetime(2022, 9, 13, 11, 20, 53), tags=[])

In [None]:
#| export


@call_parse
def azure_blob_storage_push(prediction_push_id: int):  # type: ignore
    """Push the data to azure blob storage and update its progress in db

    Args:
        prediction_push_id: Id of prediction_push

    Example:
        The following code executes a CLI command:
        ```azure_blob_storage_push 1
        ```
    """
    with get_session_with_context() as session:
        prediction_push = session.exec(
            select(PredictionPush).where(PredictionPush.id == prediction_push_id)
        ).one()

        prediction_push.error = None
        prediction_push.completed_steps = 0

        (
            destination_uri,
            destination_credential,
        ) = get_azure_blob_storage_connection_params_from_db_uri(
            db_uri=prediction_push.uri
        )

        try:
            with RemotePath.from_url(
                remote_url=destination_uri,
                pull_on_enter=False,
                push_on_exit=True,
                exist_ok=True,
                parents=True,
                credential=destination_credential,
            ) as destination_azure_blob_storage_path:
                sync_path = destination_azure_blob_storage_path.as_path()
                with RemotePath.from_url(
                    remote_url=prediction_push.prediction.path,
                    pull_on_enter=True,
                    push_on_exit=False,
                    exist_ok=True,
                    parents=False,
                ) as source_remote_path:
                    source_files = source_remote_path.as_path().iterdir()
                    for f in source_files:
                        shutil.move(str(f), sync_path)
            prediction_push.completed_steps = 1
        except Exception as e:
            prediction_push.error = truncate(str(e))

        session.add(prediction_push)
        session.commit()

In [None]:
with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == test_username)).one()

    with commit_or_rollback(session):
        datasource = DataSource(
            datablob_id=datablob_id,
            cloud_provider=datablob.cloud_provider,
            region=datablob.region,
            total_steps=1,
            user=user,
        )

    train_request = TrainRequest(
        data_uuid=datasource.uuid,
        client_column="AccountId",
        target_column="DefinitionId",
        target="load*",
        predict_after=timedelta(seconds=20 * 24 * 60 * 60),
    )

    model = train_model(train_request=train_request, user=user, session=session)

    b = BackgroundTasks()
    with set_env_variable_context(variable="JOB_EXECUTOR", value="fastapi"):
        prediction = predict_model(
            model_uuid=model.uuid, user=user, session=session, background_tasks=b
        )
    display(prediction)

    container_client, azure_blob_storage_path = create_azure_blob_storage_datablob_path(
        user_id=user.id, datablob_id=datablob_id, region=datablob.region
    )
    destination_remote_url = f"{container_client.url}/{azure_blob_storage_path}"
    copy_between_azure_blob_storage(
        source_remote_url="https://testairtservice.blob.core.windows.net/test-container/account_312571_events",
        destination_remote_url=destination_remote_url,
    )

    with commit_or_rollback(session):
        prediction.path = f"{container_client.url}/{azure_blob_storage_path}"
        session.add(prediction)

    storage_client = StorageManagementClient(
        DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"]
    )
    keys = storage_client.storage_accounts.list_keys(
        "test-airt-service", "testairtservice"
    )
    credential = keys.keys[0].value

    prediction_push = PredictionPush(
        total_steps=1,
        prediction_id=prediction.id,
        uri=create_db_uri_for_azure_blob_storage_datablob(
            uri="https://testairtservice.blob.core.windows.net/test-container/push",
            credential=credential,
        ),
    )
    session.add(prediction_push)
    session.commit()
    display(prediction_push)

    assert prediction_push.completed_steps == 0

    azure_blob_storage_push(prediction_push_id=prediction_push.id)

with get_session_with_context() as session:
    prediction_push = session.exec(
        select(PredictionPush).where(PredictionPush.id == prediction_push.id)
    ).one()
    display(prediction_push)
    assert prediction_push.completed_steps == prediction_push.total_steps

[INFO] airt_service.batch_job: create_batch_job(): command='predict 30', task='csv_processing'
[INFO] airt_service.batch_job_components.base: Entering FastAPIBatchJobContext(task=csv_processing)
[INFO] airt_service.batch_job: batch_ctx=FastAPIBatchJobContext(task=csv_processing)
[INFO] airt_service.batch_job_components.fastapi: FastAPIBatchJobContext.create_job(self=FastAPIBatchJobContext(task=csv_processing), command='predict 30', environment_vars={'AWS_ACCESS_KEY_ID': 'AKIAY7RRHQ4BEOUZVSE3', 'AWS_SECRET_ACCESS_KEY': '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', 'AWS_DEFAULT_REGION': 'eu-west-1', 'AZURE_SUBSCRIPTION_ID': '17a59428-c3d7-4cd7-94fe-b99d97d5f0ef', 'AZURE_TENANT_ID': '2d76de3f-27df-469a-8f99-addacb9239b8', 'AZURE_CLIENT_ID': '15281946-e57c-43ca-886e-d4611cfe9fe4', 'AZURE_CLIENT_SECRET': 'POf8Q~1-sM-u2JThF2xbZflR2L5ifm-FGKoKRc-H', 'AZURE_STORAGE_ACCOUNT_PREFIX': 'kumsairtsdev', 'AZURE_RESOURCE_GROUP': 'kumaran-airt-service-dev', 'AIRT_SERVICE_SUPER_USER_PASSWORD': 'što posije

Prediction(completed_steps=0, path=None, cloud_provider=<CloudProvider.azure: 'azure'>, region='westeurope', created=datetime.datetime(2022, 9, 13, 11, 20, 54), uuid=UUID('e1f99b50-65c9-46aa-92ed-55ad19cfd900'), model_id=27, total_steps=3, datasource_id=72, id=30, error=None, disabled=False)

[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.chained: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.default: DefaultAzureCredential acquired a token from EnvironmentCredential
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[INFO] azure.identity._credentials.environment: Environment is configured for ClientSecretCredential
[INFO] azure.identity._credentials.managed_identity: ManagedIdentityCredential will use IMDS
[I

PredictionPush(id=22, uuid=UUID('79dde342-3cd7-4e63-b9f6-e5cacc4896f5'), uri='https://xLFcltokRem1ADQaM4LL81XXkmvb21rZQhUqbo3C4RjIG4yeMneOJOOc9AWQOa9LeNLH6EuMPe4H+ASt7kFM+Q==@testairtservice.blob.core.windows.net/test-container/push', total_steps=1, completed_steps=0, error=None, created=datetime.datetime(2022, 9, 13, 11, 21, 19), prediction_id=30, )

[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url https://testairtservice.blob.core.windows.net/test-container/push
[INFO] airt.remote_path: AzureBlobPath._create_cache_path(): created cache path: /tmp/httpstestairtserviceblobcorewindowsnettest-containerpush_cached_w980debr
[INFO] airt.remote_path: AzureBlobPath.__init__(): created object for accessing https://testairtservice.blob.core.windows.net/test-container/push locally in /tmp/httpstestairtserviceblobcorewindowsnettest-containerpush_cached_w980debr
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url https://kumsairtsdevwesteurope.blob.core.windows.net/kumsairtsdevwesteurope/135/datablob/177
[INFO] airt.remote_path: AzureBlobPath._create_cache_path(): created cache path: /tmp/httpskumsairtsdevwesteuropeblobcorewindowsnetkumsairtsdevwesteurope135datablob177_cached_b4a9pwu4
[INFO] airt.remote_path: AzureBlobPath.__init__(): created object for accessing

PredictionPush(id=22, uuid=UUID('79dde342-3cd7-4e63-b9f6-e5cacc4896f5'), uri='https://xLFcltokRem1ADQaM4LL81XXkmvb21rZQhUqbo3C4RjIG4yeMneOJOOc9AWQOa9LeNLH6EuMPe4H+ASt7kFM+Q==@testairtservice.blob.core.windows.net/test-container/push', total_steps=1, completed_steps=1, error=None, created=datetime.datetime(2022, 9, 13, 11, 21, 19), prediction_id=30, )