In [None]:
#| default_exp components.progress_status

In [None]:
#| include: false

from airt.testing import activate_by_import

[INFO] airt.testing.activate_by_import: Testing environment activated.


In [None]:
#| export

from typing import *

In [None]:
#| exporti

from time import sleep
from datetime import datetime, timedelta

from tqdm import tqdm

from fastcore.foundation import patch

from airt.components.client import Client
from airt.logger import get_logger, set_level

In [None]:
#| include: false

import os
from contextlib import contextmanager

import logging
import pytest

import airt.sanitizer
from airt.docstring.helpers import run_examples_from_docstring
from airt.constant import SERVICE_USERNAME, SERVICE_PASSWORD

In [None]:
#| exporti

logger = get_logger(__name__)

In [None]:
#| include: false

display(logger.getEffectiveLevel())
assert logger.getEffectiveLevel() == logging.INFO

logger.debug("This is a debug message")
logger.info("This is an info")
logger.warning("This is a warning")
logger.error("This is an error")

20

[INFO] __main__: This is an info
[ERROR] __main__: This is an error


In [None]:
TEST_S3_URI = "s3://test-airt-service/ecommerce_behavior_notebooks"

In [None]:
#| export


class ProgressStatus:
    """A base class for querying status of a remote operation.
    
    Here's an example of using the ProgressStatus class to checking the upload status of the datablob
        
    Example:
        ```python
        # Importing necessary libraries
        from  airt.client import Client, DataBlob

        # Authenticate
        Client.get_token(username="{fill in username}", password="{fill in password}")

        # Create a datablob
        # In this example, the datablob will be stored in an AWS S3 bucket. The region
        # is set to eu-west-3, feel free to change the cloud provider and the region 
        # to suit your needs.
        db = DataBlob.from_s3(
            uri="{fill in uri}",
            cloud_provider="aws",
            region="eu-west-3"
        )

        # Check the ready status of the datablob
        print(db.is_ready())

        # Display the status in a progress bar
        db.progress_bar()

        # Check the ready status of the datablob
        # If the upload is successful, True will be returned
        print(db.is_ready())
        ```
    """

    def __init__(self, relative_url: str, sleep_for: Union[int, float] = 5, timeout: int = 0):
        """Constructs a new ProgressStatus instance.

        Warning:
            Do not construct this object directly by calling the constructor, please use either progress_bar, 
            is_ready, or wait methods of `DataBlob`, `DataSource`, `Model` or `Prediction` classes instead.

        Args:
            relative_url: Relative URI to query the status of the remote operation.
            sleep_for: The time interval in seconds between successive API calls.
            timeout: The maximum time allowed in seconds for the asynchronous call to complete. If not the 
                progressbar will be terminated.
                
        Raises:
            TimeoutError: in case of connection timeout.
        """
        self.relative_url = relative_url
        self.sleep_for = sleep_for
        self.timeout = timeout

    def is_ready(self) -> bool:
        """Check if the method's progress is complete.

        Returns:
            **True** if the progress is completed, else **False**.
            
        Here's an example of checking the upload status of the datablob:
        
        Example:
            ```python
            # Importing necessary libraries
            from  airt.client import Client, DataBlob

            # Authenticate
            Client.get_token(username="{fill in username}", password="{fill in password}")

            # Create a datablob
            # In this example, the datablob will be stored in an AWS S3 bucket. The region
            # is set to eu-west-3, feel free to change the cloud provider and the region 
            # to suit your needs.
            db = DataBlob.from_s3(
                uri="{fill in uri}",
                cloud_provider="aws",
                region="eu-west-3"
            )

            # Check the ready status of the datablob
            print(db.is_ready())

            # Display the status in a progress bar
            db.progress_bar()

            # Check the ready status of the datablob
            # If the upload is successful, True will be returned
            print(db.is_ready())
            ```
        """
        response = Client._get_data(relative_url=self.relative_url)
        return response["completed_steps"] == response["total_steps"]

    def progress_bar(self):
        """Blocks the execution and displays a progress bar showing the remote action progress.

        Raises:
            ConnectionError: If the server address is invalid or not reachable.
            TimeoutError: in case of connection timeout.
            
        Here's an example of checking the upload status of the datablob:
        
        Example:
            ```python
            # Importing necessary libraries
            from  airt.client import Client, DataBlob

            # Authenticate
            Client.get_token(username="{fill in username}", password="{fill in password}")

            # Create a datablob
            # In this example, the datablob will be stored in an AWS S3 bucket. The region
            # is set to eu-west-3, feel free to change the cloud provider and the region 
            # to suit your needs.
            db = DataBlob.from_s3(
                uri="{fill in uri}",
                cloud_provider="aws",
                region="eu-west-3"
            )

            # Check the ready status of the datablob
            print(db.is_ready())

            # Display the status in a progress bar
            db.progress_bar()

            # Check the ready status of the datablob
            # If the upload is successful, True will be returned
            print(db.is_ready())
            ```
        """
        total_steps = Client._get_data(relative_url=self.relative_url)["total_steps"]
        with tqdm(total=total_steps) as pbar:
            started_at = datetime.now()
            while True:
                if (0 < self.timeout) and (datetime.now() - started_at) > timedelta(
                    seconds=self.timeout
                ):
                    raise TimeoutError()

                response = Client._get_data(relative_url=self.relative_url)
                completed_steps = response["completed_steps"]

                pbar.update(completed_steps)

                if completed_steps == total_steps:
                    break

                sleep(self.sleep_for)
                
    def wait(self):
        raise NotImplementedError()

In [None]:
# Run example for ProgressStatus

username = os.environ[SERVICE_USERNAME]
password = os.environ[SERVICE_PASSWORD]

run_examples_from_docstring(
    ProgressStatus,
    username=username,
    password=password,
    uri=TEST_S3_URI
)

In [None]:
# Run example for ProgressStatus.is_ready

username = os.environ[SERVICE_USERNAME]
password = os.environ[SERVICE_PASSWORD]

run_examples_from_docstring(
    ProgressStatus.is_ready,
    username=username,
    password=password,
    uri=TEST_S3_URI
)

<module>:3: No type or annotation for returned value 1
<module>:4: No type or annotation for returned value 2


In [None]:
# Run example for ProgressStatus.progress_bar

username = os.environ[SERVICE_USERNAME]
password = os.environ[SERVICE_PASSWORD]

run_examples_from_docstring(
    ProgressStatus.progress_bar,
    username=username,
    password=password,
    uri=TEST_S3_URI
)

Failed to parse annotation from 'Name' node: 'NoneType' object has no attribute 'resolve'
Failed to parse annotation from 'Name' node: 'NoneType' object has no attribute 'resolve'
<module>:5: Failed to get 'exception: description' pair from ''


In [None]:
#| include: false

# context manager for creating a data source

@contextmanager
def generate_datablob():

    # 1. Get token
    username = os.environ[SERVICE_USERNAME]
    password = os.environ[SERVICE_PASSWORD]

    Client.get_token(username=username, password=password)

    # 2. Create Data Source
    uri = "s3://test-airt-service/account_312571_events"
    access_key = os.environ["AWS_ACCESS_KEY_ID"]
    secret_key = os.environ["AWS_SECRET_ACCESS_KEY"]

    response = Client._post_data(
        relative_url="/datablob/from_s3",
        json=dict(uri=uri, access_key=access_key, secret_key=secret_key),
    )

    yield response
    

with generate_datablob() as db:
    display(f"{db['uuid']=}")
    assert len(db['uuid']) == 36
    

"db['uuid']='25159c3b-82ce-4054-bf0b-088505a61b22'"

In [None]:
#| include: false

# Tests for ProgressStatus.progress_bar()
# Checking positive scenario.

with generate_datablob() as db:
    
    status = ProgressStatus(relative_url=f"/datablob/{db['uuid']}")

    assert not status.is_ready()
    status.progress_bar()
    display(f"{status.is_ready()=}")
    assert status.is_ready()

100%|██████████| 1/1 [00:10<00:00, 10.13s/it]


'status.is_ready()=True'

In [None]:
# Tests for ProgressStatus.progress_bar
# Testing negative scenario. Passing the wrong data source id and checking for progress
TEST_UUID_V4 = "00000000-0000-0000-0000-000000000000"
with pytest.raises(ValueError) as e:
    status = ProgressStatus(relative_url=f"/datablob/{TEST_UUID_V4}")
    status.progress_bar()

display(f"{e.value=}")

"e.value=ValueError('The datablob uuid is incorrect. Please try again.')"

In [None]:
#| export


@patch
def wait(self: ProgressStatus):
    """Blocks execution while waiting for the remote action to complete.

    Raises:
        ConnectionError: If the server address is invalid or not reachable.
        TimeoutError: in case of timeout.
        
    Here's an example of preventing further calls from being executed until the datablob upload is complete:
    
    Example:
        ```python
        # Importing necessary libraries
        from  airt.client import Client, DataBlob

        # Authenticate
        Client.get_token(username="{fill in username}", password="{fill in password}")

        # Create a datablob
        # In this example, the datablob will be stored in an AWS S3 bucket. The region
        # is set to eu-west-3, feel free to change the cloud provider and the region 
        # to suit your needs.
        db = DataBlob.from_s3(
            uri="{fill in uri}",
            cloud_provider="aws",
            region="eu-west-3"
        )

        # Further calls to the API will be blocked until the datablob upload is complete.
        db.wait()

        # Print the details of the newly created datablob
        # If the upload is successful, the ready flag should be set to True
        print(db.details())
        ```
    """
    started_at = datetime.now()
    while True:
        if (0 < self.timeout) and (datetime.now() - started_at) > timedelta(seconds=self.timeout):
            raise TimeoutError()

        if self.is_ready():
            return

        sleep(self.sleep_for)

In [None]:
# Run example for ProgressStatus.wait

username = os.environ[SERVICE_USERNAME]
password = os.environ[SERVICE_PASSWORD]

run_examples_from_docstring(
    ProgressStatus.wait,
    username=username,
    password=password,
    uri=TEST_S3_URI
)

Failed to parse annotation from 'Name' node: 'NoneType' object has no attribute 'resolve'
Failed to parse annotation from 'Name' node: 'NoneType' object has no attribute 'resolve'
<module>:5: Failed to get 'exception: description' pair from ''


In [None]:
#| include: false

# Tests for ProgressStatus.wait()
# Checking positive scenario.

with generate_datablob() as db:
    
    status = ProgressStatus(relative_url=f"/datablob/{db['uuid']}")
    
    assert not status.is_ready()
    display(f"{status.is_ready()=}")
    status.wait()
    display(f"{status.is_ready()=}")
    assert status.is_ready()

'status.is_ready()=False'

'status.is_ready()=True'