In [None]:
#| default_exp cli.ds

In [None]:
from airt.testing import activate_by_import

[INFO] airt.testing.activate_by_import: Testing environment activated.


In [None]:
#| export

from typing import *

In [None]:
#| exporti

import os

import typer
from typer import echo
from tabulate import tabulate
import datetime as dt
import pandas as pd

from airt.client import Client
from airt.cli import helper
from airt.logger import get_logger, set_level

In [None]:
import tempfile
import shutil
from pathlib import Path
from urllib.parse import quote_plus as urlquote
from contextlib import contextmanager

import logging
import pytest

from typer.testing import CliRunner
from sqlmodel import create_engine

import airt.sanitizer
from airt.components.progress_status import ProgressStatus
from airt.components.datablob import DataBlob
from airt.constant import SERVICE_USERNAME, SERVICE_PASSWORD, SERVICE_TOKEN

In [None]:
#| exporti

app = typer.Typer(
    help="A set of commands for managing datasources and training ML models on them."
)

In [None]:
runner = CliRunner()

In [None]:
#| export

logger = get_logger(__name__)

In [None]:
set_level(logging.WARNING)

In [None]:
# Testing logger settings

display(logger.getEffectiveLevel())
assert logger.getEffectiveLevel() == logging.WARNING

logger.debug("This is a debug message")
logger.info("This is an info")
logger.warning("This is a warning")
logger.error("This is an error")

30

[ERROR] __main__: This is an error


In [None]:
# Helper context manager for testing

_airt_service_token = None


@contextmanager
def set_airt_service_token_envvar():
    global _airt_service_token
    if _airt_service_token is None:
        display("_airt_service_token is None, getting a token...")
        
        username = os.environ[SERVICE_USERNAME]
        password = os.environ[SERVICE_PASSWORD]

        Client.get_token(username=username, password=password)
        _airt_service_token = Client.auth_token

    try:
        os.environ[SERVICE_TOKEN] = _airt_service_token

        yield
    finally:
        del os.environ[SERVICE_TOKEN]

In [None]:
with set_airt_service_token_envvar():
    display("*" * len((os.environ[SERVICE_TOKEN])))

'_airt_service_token is None, getting a token...'

'*******************************************************************************************************************************'

In [None]:
TEST_S3_URI = "s3://test-airt-service/ecommerce_behavior_notebooks"
RANDOM_UUID_FOR_TESTING = "00000000-0000-0000-0000-000000000000"

In [None]:
#| exporti


@app.command()
@helper.requires_auth_token
def dtypes(
    uuid: str = typer.Argument(
        ...,
        help="Datasource uuid.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> None:
    """Return the dtypes of the datasource."""

    from airt.client import DataSource

    ds = DataSource(uuid=uuid)
    dtypes = ds.dtypes.T.rename(columns={0: "dtype"})
    typer.echo(dtypes)

In [None]:
def assert_has_help(xs: List[str]):
    result = runner.invoke(app, xs + ["--help"])

    display(result.stdout)
    assert " ".join(xs) in result.stdout

In [None]:
assert_has_help(["dtypes"])

'Usage: dtypes [OPTIONS] UUID\n\n  Return the dtypes of the datasource.\n\nArguments:\n  UUID  Datasource uuid.  [required]\n\nOptions:\n  -d, --debug                     Set logger level to DEBUG and output\n                                  everything.\n  --install-completion [bash|zsh|fish|powershell|pwsh]\n                                  Install completion for the specified shell.\n  --show-completion [bash|zsh|fish|powershell|pwsh]\n                                  Show completion for the specified shell, to\n                                  copy it or customize the installation.\n  --help                          Show this message and exit.\n'

In [None]:
# helper function to create a s3 datasource

_ds = None
@contextmanager
def generate_ds(force_create: bool = False):
    global _ds
    
    if _ds is None or force_create:
        
        # Create a s3 datasource
        db = DataBlob.from_s3(
            uri=TEST_S3_URI,
            access_key=os.environ["AWS_ACCESS_KEY_ID"],
            secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
            cloud_provider="aws",
            region="eu-west-1",
        )

        db.progress_bar()
        display(f"{db.uuid=}")
        assert len (db.uuid.replace('-', '')) == 32

        _ds = db.to_datasource(
            file_type="parquet",
            index_column="user_id",
            sort_by="event_time"
        )

        display(f"{_ds.uuid=}")
        assert len (_ds.uuid.replace('-', '')) == 32
        
        _ds.progress_bar()
 
    yield _ds

In [None]:
#| include: false

# tests for dtypes
# Testing positive scenario

with set_airt_service_token_envvar():
    with generate_ds() as ds:
        ds_uuid = ds.uuid

        result = runner.invoke(app, [ds_uuid])

        display(result.stdout)

        assert result.exit_code == 0, f"{result.exit_code=} {result.stdout=}"
        assert len(result.stdout[:-1].split("\n")) == 9, result.stdout[:-1].split("\n")

100%|██████████| 1/1 [00:15<00:00, 15.18s/it]


"db.uuid='28db0282-5972-4f71-8fc8-e704e745817d'"

"_ds.uuid='85e61f59-ebb2-4f97-b7b5-64dfb7461c8d'"

100%|██████████| 1/1 [00:30<00:00, 30.32s/it]


'                             dtype\nevent_time     datetime64[ns, UTC]\nevent_type                  object\nproduct_id                   int64\ncategory_id                  int64\ncategory_code               object\nbrand                       object\nprice                      float64\nuser_session                object\n'

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def head(
    uuid: str = typer.Argument(
        ...,
        help="Datasource uuid.",
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Return the first few rows of the datasource."""

    from airt.client import DataSource

    ds = DataSource(uuid=uuid)
    df = ds.head()
    
    return {"df": df}

In [None]:
#| include: false

assert_has_help(["head"])

'Usage: root head [OPTIONS] UUID\n\n  Return the first few rows of the datasource.\n\nArguments:\n  UUID  Datasource uuid.  [required]\n\nOptions:\n  -f, --format TEXT  Format output and show only the given column(s) values.\n  -d, --debug        Set logger level to DEBUG and output everything.\n  --help             Show this message and exit.\n'

In [None]:
# tests for head
# Testing positive scenario

with set_airt_service_token_envvar():
    with generate_ds() as ds:
        ds_uuid = ds.uuid
    
        format_str = "{'product_id': '{:,d}', 'event_time': '{}'}"
        result = runner.invoke(app, ["head", ds_uuid, "--format", format_str])

        display(result.stdout)

        assert result.exit_code == 0
        assert len(result.stdout[:-1].split("\n")) == 11

'product_id    event_time\n1,304,297     2019-11-03 14:26:26+00:00\n1,306,310     2019-11-03 14:26:38+00:00\n1,306,310     2019-11-04 05:56:10+00:00\n1,306,265     2019-11-01 02:23:03+00:00\n1,306,952     2019-11-06 15:23:02+00:00\n1,306,952     2019-11-06 15:23:43+00:00\n1,307,053     2019-11-06 15:23:55+00:00\n1,307,345     2019-11-02 08:09:20+00:00\n1,306,609     2019-11-02 08:10:59+00:00\n1,307,354     2019-11-02 08:14:46+00:00\n'

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def ls(
    offset: int = typer.Option(
        0,
        "--offset",
        "-o",
        help="The number of datasources to offset at the beginning. If **None**, then the default value **0** will be used.",
    ),
    limit: int = typer.Option(
        100,
        "--limit",
        "-l",
        help="The maximum number of datasources to return from the server. If **None**, then the default value **100** will be used.",
    ),
    disabled: bool = typer.Option(
        False,
        "--disabled",
        help="If set to **True**, then only the deleted datasources will be returned." \
            "Else, the default value **False** will be used to return only the list" \
            "of active datasources."
    ),
    completed: bool = typer.Option(
        False,
        "--completed",
        help="If set to **True**, then only the datasources that are successfully downloaded" \
            "to the server will be returned. Else, the default value **False** will be used to" \
            "return all the datasources."
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output only datasource uuids separated by space",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Return the list of datasources."""

    from airt.client import DataSource
    
    dsx = DataSource.ls(offset=offset, limit=limit, disabled=disabled, completed=completed)
    df = DataSource.as_df(dsx)

    df['pulled_on'] = helper.humanize_date(df['pulled_on'])
    df['no_of_rows'] = helper.humanize_number(df['no_of_rows'])
    df['folder_size'] = helper.humanize_size(df['folder_size'])
    
    return {"df": df, "quite_column_name": "datasource_uuid"}

In [None]:
assert_has_help(["ls"])

'Usage: root ls [OPTIONS]\n\n  Return the list of datasources.\n\nOptions:\n  -o, --offset INTEGER  The number of datasources to offset at the beginning. If\n                        **None**, then the default value **0** will be used.\n                        [default: 0]\n  -l, --limit INTEGER   The maximum number of datasources to return from the\n                        server. If **None**, then the default value **100** will\n                        be used.  [default: 100]\n  --disabled            If set to **True**, then only the deleted datasources\n                        will be returned.Else, the default value **False** will\n                        be used to return only the listof active datasources.\n  --completed           If set to **True**, then only the datasources that are\n                        successfully downloadedto the server will be returned.\n                        Else, the default value **False** will be used toreturn\n                        all the data

In [None]:
# Tests for datasource_ls
# Testing positive scenario. Saving the token in env variable

def get_ids_from_result(result) -> List[int]:
    return [uuid for uuid in result.stdout[:-1].split("\n")]


with set_airt_service_token_envvar():
    with generate_ds() as ds:

        # Without quiet
        result = runner.invoke(app, ["ls"])
        display(result.stdout)

        assert "ready" in result.stdout, result.stdout
        assert result.exit_code == 0

        # Without quiet
        format_str = "{'datasource_uuid': '{}', 'ready': '{}'}"
        result = runner.invoke(app, ["ls", "--format", format_str])
        display(result.stdout)

        assert result.exit_code == 0

        # With quiet
        result = runner.invoke(app, ["ls", "-q"])
        display(result.stdout)

        assert result.exit_code == 0
        uuids = get_ids_from_result(result)
        display(f"{uuids=}")

'datasource_uuid                       datablob_uuid                         region     cloud_provider    tags    pulled_on       no_of_rows    folder_size    ready\n672146ec-103a-4b4c-8038-139597771127  ceab7670-02b2-4654-9946-26c86ea50d4e  eu-west-1  aws               latest  6 minutes ago   294,599       unknown        False\n85e61f59-ebb2-4f97-b7b5-64dfb7461c8d  28db0282-5972-4f71-8fc8-e704e745817d  eu-west-1  aws               latest  25 seconds ago  294,599       6.2 MB         True\n'

'datasource_uuid                       ready\n672146ec-103a-4b4c-8038-139597771127  False\n85e61f59-ebb2-4f97-b7b5-64dfb7461c8d  True\n'

'672146ec-103a-4b4c-8038-139597771127\n85e61f59-ebb2-4f97-b7b5-64dfb7461c8d\n'

"uuids=['672146ec-103a-4b4c-8038-139597771127', '85e61f59-ebb2-4f97-b7b5-64dfb7461c8d']"

In [None]:
#| include: false

# Tests for datasource_ls
# Testing positive scenario.
# Testing by passing different values for  limit


with set_airt_service_token_envvar():

    for limit in [1, 10, 1000]:
        offset = 1
        result = runner.invoke(
            app, ["ls", "--offset", offset, "--limit", limit, "-q"]
        )

        assert result.exit_code == 0

        uuids = get_ids_from_result(result)
        display(f"{uuids=}")
        assert limit >= len(uuids) >= 0

"uuids=['85e61f59-ebb2-4f97-b7b5-64dfb7461c8d']"

"uuids=['85e61f59-ebb2-4f97-b7b5-64dfb7461c8d']"

"uuids=['85e61f59-ebb2-4f97-b7b5-64dfb7461c8d']"

In [None]:
#| include: false

# Tests for datasource_ls
# Testing positive scenario.
# Testing by passing large value for offset.

with set_airt_service_token_envvar():

    limit = 10
    offset = 1_000_000
    result = runner.invoke(app, ["ls", "--offset", offset, "--limit", limit])

    assert result.exit_code == 0

    display(result.stdout)

'datasource_uuid    datablob_uuid    region    cloud_provider    tags    pulled_on    no_of_rows    folder_size    ready\n'

In [None]:
#| exporti


@app.command()
@helper.requires_auth_token
def train(
    uuid: str = typer.Option(
        ...,
        "--datasource_uuid",
        "-uuid",
        help="Datasource uuid.",
    ),
    client_column: str = typer.Option(
        ...,
        "--client_column",
        help="The column name that uniquely identifies the users/clients.",
    ),
    timestamp_column: Optional[str] = typer.Option(
        None,
        "--timestamp_column",
        help="The timestamp column indicating the time of an event. If not passed, then the default value **None** will be used. ",
    ),
    target_column: str = typer.Option(
        ...,
        "--target_column",
        help="Target column name that indicates the type of the event.",
    ),
    target: str = typer.Option(
        ...,
        "--target",
        help="Target event name to train and make predictions. You can pass the target event as a string or as a " \
            "regular expression for predicting more than one event. For example, passing ***checkout** will " \
            "train a model to predict any checkout event."
    ),
    predict_after: str = typer.Option(
        ...,
        "--predict_after",
        help="Time delta in hours of the expected target event.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output model uuid only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Train a model against the datasource."""

    from airt.client import DataSource
    from datetime import timedelta

    ds = DataSource(uuid=uuid)

    model = ds.train(
        client_column=client_column,
        target_column=target_column,
        target=target,
        predict_after=timedelta(hours=int(predict_after)),
        timestamp_column=timestamp_column,
    )

    if quiet:
        model.wait()
        typer.echo(f"{model.uuid}")
    else:
        typer.echo(f"Training started for model uuid: {model.uuid}")
        model.progress_bar()

In [None]:
#| include: false

assert_has_help(["train"])

'Usage: root train [OPTIONS]\n\n  Train a model against the datasource.\n\nOptions:\n  -uuid, --datasource_uuid TEXT  Datasource uuid.  [required]\n  --client_column TEXT           The column name that uniquely identifies the\n                                 users/clients.  [required]\n  --timestamp_column TEXT        The timestamp column indicating the time of an\n                                 event. If not passed, then the default value\n                                 **None** will be used.\n  --target_column TEXT           Target column name that indicates the type of\n                                 the event.  [required]\n  --target TEXT                  Target event name to train and make\n                                 predictions. You can pass the target event as a\n                                 string or as a regular expression for\n                                 predicting more than one event. For example,\n                                 passing ***checkout** 

In [None]:
# tests for train
# Testing positive scenario with quite

with set_airt_service_token_envvar():
    with generate_ds() as ds:
        ds_uuid = ds.uuid

        result = runner.invoke(
            app,
            [
                "train",
                "--datasource_uuid",
                ds_uuid,
                "--client_column",
                "user_id",
                "--target_column",
                "category_code",
                "--target",
                "*checkout",
                "--predict_after",
                "3",
                "-q",
            ],
        )

        display(result.stdout)

        assert result.exit_code == 0
        assert len (result.stdout.replace('-', '').replace('\n', '')) == 32

'5315e186-b9b5-4746-8208-1c3359165e55\n'

In [None]:
# tests for train
# Testing positive scenario without quite

with set_airt_service_token_envvar():
    with generate_ds() as ds:
        ds_uuid = ds.uuid

        result = runner.invoke(
            app,
            [
                "train",
                "-uuid",
                ds_uuid,
                "--client_column",
                "user_id",
                "--target_column",
                "category_code",
                "--target",
                "*checkout",
                "--predict_after",
                "3",
            ],
        )

        display(result.stdout)

        assert result.exit_code == 0
        assert "Training started for model uuid" in result.stdout

'Training started for model uuid: ed5ec4b6-5729-471d-bef7-76c7cf147653\n\r  0%|          | 0/5 [00:00<?, ?it/s]\r100%|██████████| 5/5 [00:00<00:00, 123.81it/s]\n'

In [None]:
# tests for train
# Testing negative scenario with quite. Passing invalid value for data_id

with set_airt_service_token_envvar():

    result = runner.invoke(
        app,
        [
            "train",
            "--datasource_uuid",
            RANDOM_UUID_FOR_TESTING,
            "--client_column",
            "PersonId",
            "--target_column",
            "DefinitionId",
            "--target",
            "*checkout",
            "--predict_after",
            "3",
            "-q",
        ],
    )

    display(result.stdout)

    assert result.exit_code == 1

'Error: The datasource uuid is incorrect. Please try again.\n'

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def details(
    uuid: str = typer.Argument(
        ...,
        help="Datasource uuid.",
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Return details of a datasource."""

    from airt.client import DataSource
    
    ds = DataSource(uuid=uuid)
    df = ds.details()
    
    df['pulled_on'] = helper.humanize_date(df['pulled_on'])
    df['no_of_rows'] = helper.humanize_number(df['no_of_rows'])
    df['folder_size'] = helper.humanize_size(df['folder_size'])
    
    return {"df": df}

In [None]:
#| include: false

assert_has_help(["details"])

'Usage: root details [OPTIONS] UUID\n\n  Return details of a datasource.\n\nArguments:\n  UUID  Datasource uuid.  [required]\n\nOptions:\n  -f, --format TEXT  Format output and show only the given column(s) values.\n  -d, --debug        Set logger level to DEBUG and output everything.\n  --help             Show this message and exit.\n'

In [None]:
#| include: false

# Tests for details
# Testing positive scenario

# Helper function to extract ID

def extract_id(res) -> str:
    r = (res.split("\n")[1]).strip()
    return r.split(" ")[0]

with set_airt_service_token_envvar():
    with generate_ds() as ds:
        ds_uuid = ds.uuid

        # Getting Details of the data source
        format_str = "{'datasource_uuid': '{}', 'ready': '{}'}"
        result = runner.invoke(app, ["details", ds_uuid, "--format", format_str])
        result_id = extract_id(result.stdout)

        display(result.stdout)

        assert result.exit_code == 0
        assert result_id == ds_uuid

'datasource_uuid                       ready\n85e61f59-ebb2-4f97-b7b5-64dfb7461c8d  True\n'

In [None]:
#| include: false

# Tests for details
# Testing negative scenario. Passing invalid data_id

with set_airt_service_token_envvar():

    result = runner.invoke(app, ["details", RANDOM_UUID_FOR_TESTING])

    display(result.stdout)

    assert result.exit_code == 1

'Error: The datasource uuid is incorrect. Please try again.\n'

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def rm(
    uuid: str = typer.Argument(
        ...,
        help="Datasource uuid.",
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output the deleted datasource uuid only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Delete a datasource from the server."""

    from airt.client import DataSource
    
    ds = DataSource(uuid=uuid)
    df = ds.delete()
    
    df['pulled_on'] = helper.humanize_date(df['pulled_on'])
    df['no_of_rows'] = helper.humanize_number(df['no_of_rows'])
    df['folder_size'] = helper.humanize_size(df['folder_size'])
    
    return {"df": df, "quite_column_name": "datasource_uuid"}

In [None]:
#| include: false

assert_has_help(["rm"])

'Usage: root rm [OPTIONS] UUID\n\n  Delete a datasource from the server.\n\nArguments:\n  UUID  Datasource uuid.  [required]\n\nOptions:\n  -f, --format TEXT  Format output and show only the given column(s) values.\n  -q, --quiet        Output the deleted datasource uuid only.\n  -d, --debug        Set logger level to DEBUG and output everything.\n  --help             Show this message and exit.\n'

In [None]:
#| include: false

# Tests for datasource rm
# Testing positive scenario with quite

with set_airt_service_token_envvar():
    with generate_ds() as ds:
        ds_uuid = ds.uuid

        # Deleting the created data source from the server
        format_str = "{'datasource_uuid': '{}'}"
        result = runner.invoke(app, ["rm", ds_uuid, "--format", format_str])
        deleted_uuid = result.stdout[:-1]

        display(deleted_uuid)

        assert result.exit_code == 0
        assert deleted_uuid == ds_uuid

        # List the existing data source ids in server and make sure the deleted id is not present in the server
        ls_result = runner.invoke(app, ["ls", "-q"])
        ls_uuids = get_ids_from_result(ls_result)

        display(ls_uuids)
        assert deleted_uuid not in ls_uuids
        
        # ls with quiet and disabled = True
        result = runner.invoke(app, ["ls", "--disabled", "-q"])

        display(result.stdout)
        assert result.exit_code == 0

        disabled_ds_uuids = get_ids_from_result(result)

        display(f"{disabled_ds_uuids=}")
        assert deleted_uuid in disabled_ds_uuids

        # Testing negative scenario. Deleting already deleted data source
        result = runner.invoke(app, ["rm", deleted_uuid, "-q"])

        # Testing negative scenario. Getting the details of the deleted data source
        result = runner.invoke(app, ["details", deleted_uuid])

'85e61f59-ebb2-4f97-b7b5-64dfb7461c8d'

['672146ec-103a-4b4c-8038-139597771127']

'6192543a-ce6e-4b1c-8ba4-626060904b55\n85e61f59-ebb2-4f97-b7b5-64dfb7461c8d\n'

"disabled_ds_uuids=['6192543a-ce6e-4b1c-8ba4-626060904b55', '85e61f59-ebb2-4f97-b7b5-64dfb7461c8d']"

In [None]:
# Tests for datasource rm
# Testing negative scenario. Deleting invalid data source

with set_airt_service_token_envvar():
    # Testing negative scenario. Deleting already deleted data source
    result = runner.invoke(app, ["rm", RANDOM_UUID_FOR_TESTING, "-q"])

    display(result.stdout)

'Error: The datasource uuid is incorrect. Please try again.\n'

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def tag(
    uuid: str = typer.Option(
        ...,
        "--datasource_uuid",
        "-uuid",
        help="Datasource uuid.",
    ),
    name: str = typer.Option(
        ...,
        "--name",
        "-n",
        help="A string to tag the datasource.",
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Tag an existing datasource in server."""

    from airt.client import DataSource
    
    ds = DataSource(uuid=uuid)
    df = ds.tag(name=name)
    
    df['pulled_on'] = helper.humanize_date(df['pulled_on'])
    df['no_of_rows'] = helper.humanize_number(df['no_of_rows'])
    df['folder_size'] = helper.humanize_size(df['folder_size'])
    
    return {"df": df}

In [None]:
#| include: false

assert_has_help(["tag"])

'Usage: root tag [OPTIONS]\n\n  Tag an existing datasource in server.\n\nOptions:\n  -uuid, --datasource_uuid TEXT  Datasource uuid.  [required]\n  -n, --name TEXT                A string to tag the datasource.  [required]\n  -f, --format TEXT              Format output and show only the given column(s)\n                                 values.\n  -d, --debug                    Set logger level to DEBUG and output\n                                 everything.\n  --help                         Show this message and exit.\n'

In [None]:
#| include: false

# Tests for tag
# Testing positive scenario

with set_airt_service_token_envvar():
    with generate_ds(force_create=True) as ds:
        ds_uuid = ds.uuid

        # Tag the data source
        format_str = "{'datasource_uuid': '{}', 'tags': '{}'}"
        result = runner.invoke(app, ["tag", "-uuid", ds_uuid, "-n", "v1.1.0", "--format", format_str])

        display(result.stdout)

        assert result.exit_code == 0
        assert "v1.1.0" in str(result.stdout)


        # Tag the data source
        result = runner.invoke(app, ["tag", "-uuid", ds_uuid, "-n", "v1.1.0"])

        display(result.stdout)

        assert result.exit_code == 0
        assert "v1.1.0" in str(result.stdout)

100%|██████████| 1/1 [00:15<00:00, 15.19s/it]


"db.uuid='03727d6d-eaf4-4503-ace7-1d2d22f60cbe'"

"_ds.uuid='690f2af8-2d51-4dcd-8661-11f38a478585'"

100%|██████████| 1/1 [00:30<00:00, 30.29s/it]


'datasource_uuid                       tags\n690f2af8-2d51-4dcd-8661-11f38a478585  latest, v1.1.0\n'

'datasource_uuid                       datablob_uuid                         region     cloud_provider    tags            pulled_on       no_of_rows    folder_size    ready\n690f2af8-2d51-4dcd-8661-11f38a478585  03727d6d-eaf4-4503-ace7-1d2d22f60cbe  eu-west-1  aws               latest, v1.1.0  24 seconds ago  294,599       6.2 MB         True\n'