In [None]:
#| default_exp cli.db

In [None]:
from airt.testing import activate_by_import

[INFO] airt.testing.activate_by_import: Testing environment activated.


In [None]:
#| export

from typing import *

In [None]:
#| exporti

import os
import json

import typer
from typer import echo
from tabulate import tabulate
import datetime as dt
import pandas as pd

from airt.client import Client
from airt.cli import helper
from airt.logger import get_logger, set_level
from airt.constant import CLIENT_DB_USERNAME, CLIENT_DB_PASSWORD

In [None]:
import tempfile
import shutil
from pathlib import Path
from urllib.parse import quote_plus as urlquote
from contextlib import contextmanager

import logging
import pytest

from typer.testing import CliRunner
from sqlmodel import create_engine
from azure.identity import DefaultAzureCredential
from azure.mgmt.storage import StorageManagementClient

import airt.sanitizer
from airt.constant import SERVICE_TOKEN, SERVER_URL, SERVICE_USERNAME, SERVICE_PASSWORD, CLIENT_NAME, CLIENT_DB_USERNAME, CLIENT_DB_PASSWORD
from airt.client import DataBlob

In [None]:
#| exporti

app = typer.Typer(
    help="""A set of commands for importing and processing data from sources such as CSV/parquet files, databases, AWS S3 buckets, and Azure Blob Storage."""
)

In [None]:
runner = CliRunner()

In [None]:
#| export

logger = get_logger(__name__)

In [None]:
set_level(logging.WARNING)

In [None]:
# Testing logger settings

display(logger.getEffectiveLevel())
assert logger.getEffectiveLevel() == logging.WARNING

logger.debug("This is a debug message")
logger.info("This is an info")
logger.warning("This is a warning")
logger.error("This is an error")

30

[ERROR] __main__: This is an error


In [None]:
# Helper context manager for testing

_airt_service_token = None


@contextmanager
def set_airt_service_token_envvar():
    global _airt_service_token
    if _airt_service_token is None:
        display("_airt_service_token is None, getting a token...")
        
        username = os.environ[SERVICE_USERNAME]
        password = os.environ[SERVICE_PASSWORD]

        Client.get_token(username=username, password=password)
        _airt_service_token = Client.auth_token

    try:
        os.environ[SERVICE_TOKEN] = _airt_service_token

        yield
    finally:
        del os.environ[SERVICE_TOKEN]

In [None]:
with set_airt_service_token_envvar():
    display("*" * len((os.environ[SERVICE_TOKEN])))

'_airt_service_token is None, getting a token...'

'*******************************************************************************************************************************'

In [None]:
def assert_has_help(xs: List[str]):
    result = runner.invoke(app, xs + ["--help"])

    display(result.stdout)
    assert " ".join(xs) in result.stdout

In [None]:
TEST_S3_URI = "s3://test-airt-service/ecommerce_behavior_notebooks"
TEST_S3_CSV_URI = "s3://test-airt-service/ecommerce_behavior_csv"
TEST_AZURE_URI = "https://testairtservice.blob.core.windows.net/test-container/ecommerce_behavior_notebooks"
RANDOM_UUID_FOR_TESTING = "00000000-0000-0000-0000-000000000000" 

In [None]:
def remove_hypens_from_id(id:str) -> str:
    return "".join((id).split("-"))

In [None]:
actual = remove_hypens_from_id(RANDOM_UUID_FOR_TESTING)
assert len(actual) == 32
actual

'00000000000000000000000000000000'

In [None]:
# helper function to create a datablob


_db = None
@contextmanager
def generate_db(force_create: bool = False):
    global _db
    
    if _db is None or force_create:

        _db = DataBlob.from_s3(
            uri=TEST_S3_URI,
            access_key=os.environ["AWS_ACCESS_KEY_ID"],
            secret_key=os.environ["AWS_SECRET_ACCESS_KEY"]
        )

        display(f"{_db.uuid=}")
        assert len(remove_hypens_from_id(_db.uuid)) == 32
        
        _db.progress_bar()
    
    yield _db

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def details(
    uuid: str = typer.Argument(
        ...,
        help="Datablob uuid.",
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Return details of a datablob."""

    from airt.client import DataBlob
    
    db = DataBlob(uuid=uuid)
    df = db.details()
    
    df['pulled_on'] = helper.humanize_date(df['pulled_on'])
    df['folder_size'] = helper.humanize_size(df['folder_size'])
    
    return {"df": df}

In [None]:
#| include: false

assert_has_help(["details"])

'Usage: details [OPTIONS] UUID\n\n  Return details of a datablob.\n\nArguments:\n  UUID  Datablob uuid.  [required]\n\nOptions:\n  -f, --format TEXT               Format output and show only the given\n                                  column(s) values.\n  -d, --debug                     Set logger level to DEBUG and output\n                                  everything.\n  --install-completion [bash|zsh|fish|powershell|pwsh]\n                                  Install completion for the specified shell.\n  --show-completion [bash|zsh|fish|powershell|pwsh]\n                                  Show completion for the specified shell, to\n                                  copy it or customize the installation.\n  --help                          Show this message and exit.\n'

In [None]:
# Tests for details
# Testing positive scenario

# Helper function to extract ID

def extract_id(res) -> str:
    r = (res.split("\n")[1]).strip()
    return r.split(" ")[0]

with set_airt_service_token_envvar():
    with generate_db() as db:
        db_uuid = db.uuid

        # Getting Details of the data source
        format_str = "{'datablob_uuid': '{}'}"
        result = runner.invoke(app, [db_uuid, "--format", format_str])

        display(result.stdout)

        assert result.exit_code == 0
        assert result.stdout == f"{db_uuid}\n", f"{result.stdout=} {db_uuid=}"

"_db.uuid='5f4579a6-4fab-4f5e-9363-41abea96dd2e'"

100%|██████████| 1/1 [00:20<00:00, 20.24s/it]


'5f4579a6-4fab-4f5e-9363-41abea96dd2e\n'

In [None]:
#| include: false

# Tests for details
# Testing negative scenario. Passing invalid data_id

with set_airt_service_token_envvar():

    data_uuid = RANDOM_UUID_FOR_TESTING
    result = runner.invoke(app, [data_uuid])

    display(result.stdout)

    assert result.exit_code == 1

'Error: The datablob uuid is incorrect. Please try again.\n'

In [None]:
#| exporti


@app.command("from-s3")
@helper.requires_auth_token
def from_s3(
    uri: str = typer.Argument(
        ..., help="The AWS S3 bucket uri."
    ),
    access_key: Optional[str] = typer.Option(
        None,
        help="Access key for the S3 bucket. If **None** (default value), then the value from **AWS_ACCESS_KEY_ID** environment variable is used.",
    ),
    secret_key: Optional[str] = typer.Option(
        None,
        help="Secret key for the S3 bucket. If **None** (default value), then the value from **AWS_SECRET_ACCESS_KEY** environment variable is used.",
    ),
    cloud_provider: Optional[str] = typer.Option(
        None,
        "--cloud-provider",
        "-cp",
        help="The destination cloud storage provider's name to store the datablob. Currently, the API only supports **aws** and **azure** as cloud storage providers. If **None** (default value), then **aws**  will be used as the cloud storage provider.",
    ),
    region: Optional[str] = typer.Option(
        None,
        "--region",
        "-r",
        help="The destination cloud provider's region to save your datablob. If **None** (default value) then the default region will be assigned based on the cloud provider. " \
            "In the case of **aws**, the datablob's source bucket region will be used and in the case of **azure**, **westeurope** will be used. " \
            "The supported AWS regions are: ap-northeast-1, ap-northeast-2, ap-south-1, ap-southeast-1, ap-southeast-2, ca-central-1, eu-central-1, " \
            "eu-north-1, eu-west-1, eu-west-2, eu-west-3, sa-east-1, us-east-1, us-east-2, us-west-1, us-west-2. The supported Azure Blob Storage " \
            "regions are: australiacentral, australiacentral2, australiaeast, australiasoutheast, brazilsouth, canadacentral, canadaeast, centralindia, " \
            "centralus, eastasia, eastus, eastus2, francecentral, francesouth, germanynorth, germanywestcentral, japaneast, japanwest, koreacentral, koreasouth, " \
            "northcentralus, northeurope, norwayeast, norwaywest, southafricanorth, southafricawest, southcentralus, southeastasia, southindia, switzerlandnorth, " \
            "switzerlandwest, uaecentral, uaenorth, uksouth, ukwest, westcentralus, westeurope, westindia, westus, westus2."
    ),
    tag: Optional[str] = typer.Option(
        None,
        "--tag",
        "-t",
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datablob uuid only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Create and return a datablob that encapsulates the data from an AWS S3 bucket."""

    from airt.client import DataBlob

    db = DataBlob.from_s3(uri=uri, access_key=access_key, secret_key=secret_key, cloud_provider=cloud_provider, region=region, tag=tag)

    if quiet:
        db.wait()

        typer.echo(f"{db.uuid}")
    else:
        typer.echo(f"Pulling datablob uuid: {db.uuid}")

        db.progress_bar()

In [None]:
#| include: false

assert_has_help(["from-s3"])

"Usage: root from-s3 [OPTIONS] URI\n\n  Create and return a datablob that encapsulates the data from an AWS S3 bucket.\n\nArguments:\n  URI  The AWS S3 bucket uri.  [required]\n\nOptions:\n  --access-key TEXT           Access key for the S3 bucket. If **None** (default\n                              value), then the value from **AWS_ACCESS_KEY_ID**\n                              environment variable is used.\n  --secret-key TEXT           Secret key for the S3 bucket. If **None** (default\n                              value), then the value from\n                              **AWS_SECRET_ACCESS_KEY** environment variable is\n                              used.\n  -cp, --cloud-provider TEXT  The destination cloud storage provider's name to\n                              store the datablob. Currently, the API only\n                              supports **aws** and **azure** as cloud storage\n                              providers. If **None** (default value), then\n                  

In [None]:
# Helper function to test multiple scenarios.


def assert_datablob(xs: List[str]):

    # Testing Negative scenario
    # Creating datablob without token
    
    # Clearing previously set env variables
    _token_flag = False

    if os.environ.get(SERVICE_TOKEN):
        _token_flag = True
        airt_service_token = os.environ[SERVICE_TOKEN]
        del os.environ[SERVICE_TOKEN]

    result = runner.invoke(app, xs)
    display(result.stdout)
    assert result.exit_code == 1
    assert f"KeyError: The environment variable '{SERVICE_TOKEN}' is not set.\n\nPlease run the command '{CLIENT_NAME} token'" in result.stdout
    
    if _token_flag:
        os.environ[SERVICE_TOKEN] = airt_service_token

    # Testing Positive scenario
    # With and without quite

    with set_airt_service_token_envvar():
        # Without quiet (verbose)
        result = runner.invoke(app, xs)
        display(result.stdout)
        assert "Pulling datablob uuid:" in result.stdout, result.stdout

        # With quiet
        display("*" * 120)
        result = runner.invoke(app, xs + ["-q"])
        display(result.stdout)
        assert len(remove_hypens_from_id(result.stdout[:-1])) == 32, len(result.stdout[:-1])

In [None]:
# Tests for Datablob s3

cmd = ["from-s3", f"{TEST_S3_CSV_URI}"]

assert_datablob(cmd)

"KeyError: The environment variable 'AIRT_SERVICE_TOKEN' is not set.\n\nPlease run the command 'airt token' to get the application token and set it in the environment variable `AIRT_SERVICE_TOKEN`.\n\nTry 'airt token --help' for help.\n"

'Pulling datablob uuid: 52fe570b-ddbf-495b-93d1-97d6b65f276a\n\r  0%|          | 0/1 [00:00<?, ?it/s]\r  0%|          | 0/1 [00:05<?, ?it/s]\r  0%|          | 0/1 [00:10<?, ?it/s]\r  0%|          | 0/1 [00:15<?, ?it/s]\r  0%|          | 0/1 [00:20<?, ?it/s]\r  0%|          | 0/1 [00:25<?, ?it/s]\r  0%|          | 0/1 [00:30<?, ?it/s]\r100%|██████████| 1/1 [00:35<00:00,  5.05s/it]\r100%|██████████| 1/1 [00:35<00:00, 35.37s/it]\n'

'************************************************************************************************************************'

'5d47fdf7-ac39-48e2-84e1-ed3fb248a980\n'

In [None]:
#| exporti


@app.command("from-azure-blob-storage")
@helper.requires_auth_token
def from_azure_blob_storage(
    uri: str = typer.Argument(
        ..., help="Azure Blob Storage URI of the source file."
    ),
    credential: str = typer.Option(
        ...,
        "--credential",
        "-c",
        help="Credential to access the Azure Blob Storage.",
    ),
    cloud_provider: Optional[str] = typer.Option(
        None,
        "--cloud-provider",
        "-cp",
        help="The destination cloud storage provider's name to store the datablob. Currently, the API only supports **aws** and **azure** as cloud storage providers. If **None** (default value), then **azure**  will be used as the cloud storage provider.",
    ),
    region: Optional[str] = typer.Option(
        None,
        "--region",
        "-r",
        help="The destination cloud provider's region to save your datablob. If **None** (default value) then the default region will be assigned based on the cloud provider. " \
            "In the case of **aws**, **eu-west-1** will be used and in the case of **azure**, **westeurope** will be used. " \
            "The supported AWS regions are: ap-northeast-1, ap-northeast-2, ap-south-1, ap-southeast-1, ap-southeast-2, ca-central-1, eu-central-1, " \
            "eu-north-1, eu-west-1, eu-west-2, eu-west-3, sa-east-1, us-east-1, us-east-2, us-west-1, us-west-2. The supported Azure Blob Storage " \
            "regions are: australiacentral, australiacentral2, australiaeast, australiasoutheast, brazilsouth, canadacentral, canadaeast, centralindia, " \
            "centralus, eastasia, eastus, eastus2, francecentral, francesouth, germanynorth, germanywestcentral, japaneast, japanwest, koreacentral, koreasouth, " \
            "northcentralus, northeurope, norwayeast, norwaywest, southafricanorth, southafricawest, southcentralus, southeastasia, southindia, switzerlandnorth, " \
            "switzerlandwest, uaecentral, uaenorth, uksouth, ukwest, westcentralus, westeurope, westindia, westus, westus2."
    ),
    tag: Optional[str] = typer.Option(
        None,
        "--tag",
        "-t",
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datablob uuid only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Create and return a datablob that encapsulates the data from an Azure Blob Storage."""

    from airt.client import DataBlob

    db = DataBlob.from_azure_blob_storage(uri=uri, credential=credential, cloud_provider=cloud_provider, region=region, tag=tag)

    if quiet:
        db.wait()

        typer.echo(f"{db.uuid}")
    else:
        typer.echo(f"Pulling datablob uuid: {db.uuid}")

        db.progress_bar()

In [None]:
assert_has_help(["from-azure-blob-storage"])

"Usage: root from-azure-blob-storage [OPTIONS] URI\n\n  Create and return a datablob that encapsulates the data from an Azure Blob\n  Storage.\n\nArguments:\n  URI  Azure Blob Storage URI of the source file.  [required]\n\nOptions:\n  -c, --credential TEXT       Credential to access the Azure Blob Storage.\n                              [required]\n  -cp, --cloud-provider TEXT  The destination cloud storage provider's name to\n                              store the datablob. Currently, the API only\n                              supports **aws** and **azure** as cloud storage\n                              providers. If **None** (default value), then\n                              **azure**  will be used as the cloud storage\n                              provider.\n  -r, --region TEXT           The destination cloud provider's region to save\n                              your datablob. If **None** (default value) then\n                              the default region will be assigne

In [None]:
# Tests for from-azure-blob-storage
# Positive Scenario: Passing credential in arguments

storage_client = StorageManagementClient(
    DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"]
)
keys = storage_client.storage_accounts.list_keys(
    "test-airt-service", "testairtservice"
)
credential = keys.keys[0].value

cmd = [
    "from-azure-blob-storage",
    f"{TEST_AZURE_URI}",
    "--credential",
    f"{credential}",
]

assert_datablob(cmd)


"KeyError: The environment variable 'AIRT_SERVICE_TOKEN' is not set.\n\nPlease run the command 'airt token' to get the application token and set it in the environment variable `AIRT_SERVICE_TOKEN`.\n\nTry 'airt token --help' for help.\n"

'Pulling datablob uuid: 2221ab31-82db-433e-9f97-81e494e54088\n\r  0%|          | 0/1 [00:00<?, ?it/s]\r  0%|          | 0/1 [00:05<?, ?it/s]\r  0%|          | 0/1 [00:10<?, ?it/s]\r100%|██████████| 1/1 [00:15<00:00,  5.05s/it]\r100%|██████████| 1/1 [00:15<00:00, 15.19s/it]\n'

'************************************************************************************************************************'

'106d0e83-2129-4df8-83ad-32490b9e4da0\n'

In [None]:
# Tests for from-azure-blob-storage
# Positive Scenario: Validating the default region
with set_airt_service_token_envvar():
    
    for region in ["westeurope", "northeurope"]:
        
        cmd = [
            "from-azure-blob-storage",
            f"{TEST_AZURE_URI}",
            "--credential",
            f"{credential}",
            "--cloud-provider",
            "azure",
            "--region",
            f"{region}",
            "-q"
        ]

        result = runner.invoke(app, cmd )
        display(result.stdout)

        db_uuid = result.stdout[:-1]
        display(db_uuid)
        assert len(remove_hypens_from_id(db_uuid)) == 32

        result = runner.invoke(app, ["details", db_uuid])
        display(result.stdout)
        assert result.exit_code == 0
        assert region in result.stdout

'91f38db9-7cea-4d5b-a72a-053ad044546b\n'

'91f38db9-7cea-4d5b-a72a-053ad044546b'

'datablob_uuid                         datasource_uuids    type                source                                                                                     region      cloud_provider    tags    pulled_on      folder_size    user_uuid                             error    disabled    ready\n91f38db9-7cea-4d5b-a72a-053ad044546b  <none>              azure_blob_storage  https://testairtservice.blob.core.windows.net/test-container/ecommerce_behavior_notebooks  westeurope  azure             latest  8 seconds ago  10.2 MB        06a385d1-66a1-4ffc-8306-7f5821902fcc  <none>   False       True\n'

'05f83868-b5f1-430d-b6f3-ecf81aa99476\n'

'05f83868-b5f1-430d-b6f3-ecf81aa99476'

'datablob_uuid                         datasource_uuids    type                source                                                                                     region       cloud_provider    tags    pulled_on      folder_size    user_uuid                             error    disabled    ready\n05f83868-b5f1-430d-b6f3-ecf81aa99476  <none>              azure_blob_storage  https://testairtservice.blob.core.windows.net/test-container/ecommerce_behavior_notebooks  northeurope  azure             latest  8 seconds ago  10.2 MB        06a385d1-66a1-4ffc-8306-7f5821902fcc  <none>   False       True\n'

In [None]:
#| exporti


@app.command("from-mysql")
@helper.requires_auth_token
def from_mysql(
    host: str = typer.Option(..., help="Remote database host name."),
    database: str = typer.Option(
        ..., help="Database name."
    ),
    table: str = typer.Option(..., help="Table name."),
    port: int = typer.Option(
        3306,
        help="Host port number. If not passed, then the default value **3306** will be used.",
    ),
    cloud_provider: Optional[str] = typer.Option(
        None,
        "--cloud-provider",
        "-cp",
        help="The destination cloud storage provider's name to store the datablob. Currently, the API only supports **aws** and **azure** as cloud storage providers. If **None** (default value), then **aws**  will be used as the cloud storage provider.",
    ),
    region: Optional[str] = typer.Option(
        None,
        "--region",
        "-r",
        help="The destination cloud provider's region to save your datablob. If **None** (default value) then the default region will be assigned based on the cloud provider. " \
            "In the case of **aws**, **eu-west-1** will be used and in the case of **azure**, **westeurope** will be used. " \
            "The supported AWS regions are: ap-northeast-1, ap-northeast-2, ap-south-1, ap-southeast-1, ap-southeast-2, ca-central-1, eu-central-1, " \
            "eu-north-1, eu-west-1, eu-west-2, eu-west-3, sa-east-1, us-east-1, us-east-2, us-west-1, us-west-2. The supported Azure Blob Storage " \
            "regions are: australiacentral, australiacentral2, australiaeast, australiasoutheast, brazilsouth, canadacentral, canadaeast, centralindia, " \
            "centralus, eastasia, eastus, eastus2, francecentral, francesouth, germanynorth, germanywestcentral, japaneast, japanwest, koreacentral, koreasouth, " \
            "northcentralus, northeurope, norwayeast, norwaywest, southafricanorth, southafricawest, southcentralus, southeastasia, southindia, switzerlandnorth, " \
            "switzerlandwest, uaecentral, uaenorth, uksouth, ukwest, westcentralus, westeurope, westindia, westus, westus2."
    ),
    username: Optional[str] = typer.Option(
        None,
        "--username",
        "-u",
        help=f'Database username. If not passed, the default value "root" will be used unless the value is explicitly set in the environment variable **{CLIENT_DB_USERNAME}**.'
    ),
    password: Optional[str] = typer.Option(
        None,
        "--password",
        "-p",
        help=f'Database password. If not passed, the default value "" will be used unless the value is explicitly set in the environment variable **{CLIENT_DB_PASSWORD}**.'
    ),
    tag: Optional[str] = typer.Option(
        None,
        "--tag",
        "-t",
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datablob uuid only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Create and return a datablob that encapsulates the data from a mysql database.
    
    If the database requires authentication, pass the username/password as commandline arguments or store it in
    the **AIRT_CLIENT_DB_USERNAME** and **AIRT_CLIENT_DB_PASSWORD** environment variables.
    """

    from airt.client import DataBlob

    db = DataBlob.from_mysql(
        host=host,
        database=database,
        port=port,
        table=table,
        username=username,
        password=password,
        cloud_provider=cloud_provider,
        region=region,
        tag=tag
    )

    if quiet:
        db.wait()
        typer.echo(f"{db.uuid}")
    else:
        typer.echo(f"Pulling datablob uuid: {db.uuid}")
        db.progress_bar()

In [None]:
assert_has_help(["from-mysql"])

'Usage: root from-mysql [OPTIONS]\n\n  Create and return a datablob that encapsulates the data from a mysql database.\n\n  If the database requires authentication, pass the username/password as\n  commandline arguments or store it in the **AIRT_CLIENT_DB_USERNAME** and\n  **AIRT_CLIENT_DB_PASSWORD** environment variables.\n\nOptions:\n  --host TEXT                 Remote database host name.  [required]\n  --database TEXT             Database name.  [required]\n  --table TEXT                Table name.  [required]\n  --port INTEGER              Host port number. If not passed, then the default\n                              value **3306** will be used.  [default: 3306]\n  -cp, --cloud-provider TEXT  The destination cloud storage provider\'s name to\n                              store the datablob. Currently, the API only\n                              supports **aws** and **azure** as cloud storage\n                              providers. If **None** (default value), then\n           

In [None]:
# tests for db. Testing negative scenario.
# Passing invalid host address

cmd = [
    "from-mysql",
    "--host",
    "db.staging.airt.ai",
    "--database",
    "test",
    "--table",
    "test",
]


with set_airt_service_token_envvar():
    # Without quiet (verbose)
    result = runner.invoke(app, cmd)
    display(result.stdout)
    assert "Unknown MySQL server host 'db.staging.airt.ai'" in result.stdout

'Pulling datablob uuid: 3c5a1784-d9cc-4098-a214-11ef8adf7769\n\r  0%|          | 0/1 [00:00<?, ?it/s]\r  0%|          | 0/1 [00:05<?, ?it/s]\nError: (MySQLdb.OperationalError) (2005, "Unknown MySQL server host \'db.staging.airt.ai\' (-2)")\n(Background on this error at: https://sqlalche.me/e/14/e3q8)\n'

In [None]:
# tests for db. Testing positive scenario.

# Helper function to create new table in the mysql db

def get_db_engine():
    
    username=os.environ["DB_USERNAME"]
    password=os.environ["DB_PASSWORD"]
    host=os.environ["DB_HOST"]
    port=int(os.environ["DB_PORT"])
    database=os.environ["DB_DATABASE"]
    database_server=os.environ["DB_DATABASE_SERVER"]
    
    quoted_password = urlquote(password)
    conn_str = f"{database_server}://{username}:{quoted_password}@{host}:{port}/{database}"
    engine = create_engine(conn_str)
    
    return engine

with tempfile.TemporaryDirectory(prefix="test_s3_download_") as d:
    !aws s3 sync {TEST_S3_URI} {d}
    !ls {d}
    
    engine = get_db_engine()
    
    df = pd.read_parquet(d)
    try:
        df.to_sql("test_db_pull", con=engine, if_exists="fail")
    except ValueError as e:
        display(e)


with set_airt_service_token_envvar():
    
    for region in ["eu-west-1", "eu-west-3"]:
        
        # Creating a new datasource
        cmd = [
            "from-mysql", 
            "--host", os.environ["DB_HOST"],
            "--database", os.environ["DB_DATABASE"],
            "--table", "test_db_pull",
            "--username", os.environ["DB_USERNAME"],
            "--password", os.environ["DB_PASSWORD"],
            "--tag", "v1.1.0"
        ]

        cmd_q = [
            "from-mysql", 
            "--host", os.environ["DB_HOST"],
            "--database", os.environ["DB_DATABASE"],
            "--table", "test_db_pull",
            "--username", os.environ["DB_USERNAME"],
            "--password", os.environ["DB_PASSWORD"],
            "-cp", "aws",
            "--region", region,
            "-q",
        ]
    
        # Without quiet
        result = runner.invoke(app, cmd)

        display(result.stdout)
        assert "Pulling datablob uuid:" in str(result.stdout)

        # With quiet
        result = runner.invoke(app, cmd_q)
        db_uuid = result.stdout[:-1]
        display(db_uuid)
        assert len(remove_hypens_from_id(db_uuid)) == 32

        result = runner.invoke(app, ["details", db_uuid])
        display(result.stdout)
        assert result.exit_code == 0
        assert region in result.stdout

download: s3://test-airt-service/ecommerce_behavior_notebooks/_common_metadata to ../../../tmp/test_s3_download_ycsu_oym/_common_metadata
download: s3://test-airt-service/ecommerce_behavior_notebooks/_metadata to ../../../tmp/test_s3_download_ycsu_oym/_metadata
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.11.parquet to ../../../tmp/test_s3_download_ycsu_oym/part.11.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.0.parquet to ../../../tmp/test_s3_download_ycsu_oym/part.0.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.15.parquet to ../../../tmp/test_s3_download_ycsu_oym/part.15.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.1.parquet to ../../../tmp/test_s3_download_ycsu_oym/part.1.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.13.parquet to ../../../tmp/test_s3_download_ycsu_oym/part.13.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/

ValueError("Table 'test_db_pull' already exists.")

'Pulling datablob uuid: 21660a1e-1b90-477a-a9c1-493db7def964\n\r  0%|          | 0/1 [00:00<?, ?it/s]\r  0%|          | 0/1 [00:05<?, ?it/s]\r100%|██████████| 1/1 [00:10<00:00,  5.06s/it]\r100%|██████████| 1/1 [00:10<00:00, 10.15s/it]\n'

'518fd551-fd62-43ad-bb9c-22db6660d0de'

'datablob_uuid                         datasource_uuids    type    source                                               region     cloud_provider    tags    pulled_on      folder_size    user_uuid                             error    disabled    ready\n518fd551-fd62-43ad-bb9c-22db6660d0de  <none>              db      mysql://harish-mysql:3306/airt_service/test_db_pull  eu-west-1  aws               latest  2 seconds ago  8.0 MB         06a385d1-66a1-4ffc-8306-7f5821902fcc  <none>   False       True\n'

'Pulling datablob uuid: b1589391-1c13-4fa2-800f-fd2d73012d1a\n\r  0%|          | 0/1 [00:00<?, ?it/s]\r  0%|          | 0/1 [00:05<?, ?it/s]\r100%|██████████| 1/1 [00:10<00:00,  5.05s/it]\r100%|██████████| 1/1 [00:10<00:00, 10.14s/it]\n'

'4d4b37f3-01ff-4f90-9e31-a2432449d93d'

'datablob_uuid                         datasource_uuids    type    source                                               region     cloud_provider    tags    pulled_on      folder_size    user_uuid                             error    disabled    ready\n4d4b37f3-01ff-4f90-9e31-a2432449d93d  <none>              db      mysql://harish-mysql:3306/airt_service/test_db_pull  eu-west-3  aws               latest  2 seconds ago  8.0 MB         06a385d1-66a1-4ffc-8306-7f5821902fcc  <none>   False       True\n'

In [None]:
#| exporti


@app.command("to-datasource")
@helper.requires_auth_token
def to_datasource(
    uuid: str = typer.Option(
        ...,
        help="Datablob uuid.",
    ),
    file_type: str = typer.Option(
        ...,
        help='The file type of the datablob. Currently, the API only supports "csv" and "parquet" as file types.',
    ),
    index_column: str = typer.Option(
        ...,
        help="The column to use as index (row labels).",
    ),
    sort_by: str = typer.Option(
        ...,
        help="The column(s) to sort the data. Can either be a string or a JSON encoded list of strings.",
    ),
    deduplicate_data: bool = typer.Option(
        False,
        help="If set to **True** (default value **False**), the datasource will be created with duplicate rows removed.",
    ),
    blocksize: str = typer.Option(
        "256MB",
        help="The number of bytes used to split larger files. If None, then the default value **256MB** will be used.",
    ),
    kwargs_json: Optional[str] = typer.Option(
        None,
        help="Additional JSON encoded dict arguments to use while processing the data.e.g: To skip 100 lines from the bottom of file, pass '{"
        '"skipfooter"'
        ": 100}'",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datasource uuid only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Process the datablob and return a datasource object."""

    from airt.client import DataBlob

    kwargs = json.loads(kwargs_json) if kwargs_json else {}

    try:
        sort_by = json.loads(sort_by)

    except json.JSONDecodeError as e:
        pass

    db = DataBlob(uuid=uuid)
    ds = db.to_datasource(
        file_type=file_type,
        index_column=index_column,
        sort_by=sort_by,
        deduplicate_data=deduplicate_data,
        blocksize=blocksize,
        **kwargs,
    )

    if quiet:
        ds.wait()
        typer.echo(f"{ds.uuid}")
    else:
        typer.echo(f"Processing and pulling the datasource uuid: {ds.uuid}")

        ds.progress_bar()

In [None]:
assert_has_help(["to-datasource"])

'Usage: root to-datasource [OPTIONS]\n\n  Process the datablob and return a datasource object.\n\nOptions:\n  --uuid TEXT                     Datablob uuid.  [required]\n  --file-type TEXT                The file type of the datablob. Currently, the\n                                  API only supports "csv" and "parquet" as file\n                                  types.  [required]\n  --index-column TEXT             The column to use as index (row labels).\n                                  [required]\n  --sort-by TEXT                  The column(s) to sort the data. Can either be\n                                  a string or a JSON encoded list of strings.\n                                  [required]\n  --deduplicate-data / --no-deduplicate-data\n                                  If set to **True** (default value **False**),\n                                  the datasource will be created with duplicate\n                                  rows removed.  [default: no-deduplicate-data

In [None]:
# Tests for to-datasource

with set_airt_service_token_envvar():
    with generate_db() as db:
        cmd = [
            "to-datasource",
            "--uuid", f"{db.uuid}",
            "--file-type", "parquet",
            "--index-column", "user_id",
            "--sort-by", "event_time",
            "--kwargs-json", '{"parse_dates": ["event_time"], "skipfooter": 100}',
        ]
        result = runner.invoke(app, cmd)

        display(result.stdout)
        assert result.exit_code == 0, f"{result.stdout=}, {result.exit_code=}"
        assert "Processing and pulling the datasource uuid:" in result.stdout, result.stdout
        
        cmd = [
            "to-datasource",
            "--uuid", f"{db.uuid}",
            "--file-type", "parquet",
            "--index-column", "user_id",
            "--sort-by", '["event_time", "category_id"]',
            "--kwargs-json", '{"parse_dates": ["event_time"], "skipfooter": 100}',
            "-q",
        ]
        result = runner.invoke(app, cmd)

        display(result.stdout)
        assert result.exit_code == 0, f"{result.stdout=}, {result.exit_code=}"
        assert len (result.stdout[:-1].replace('-', '').replace('\n', '')) == 32

'Processing and pulling the datasource uuid: 8b5b59ae-7dd5-4ff0-b35b-bc040768ef6f\n\r  0%|          | 0/1 [00:00<?, ?it/s]\r  0%|          | 0/1 [00:05<?, ?it/s]\r  0%|          | 0/1 [00:10<?, ?it/s]\r  0%|          | 0/1 [00:15<?, ?it/s]\r  0%|          | 0/1 [00:20<?, ?it/s]\r  0%|          | 0/1 [00:25<?, ?it/s]\r100%|██████████| 1/1 [00:30<00:00,  5.05s/it]\r100%|██████████| 1/1 [00:30<00:00, 30.36s/it]\n'

'3978f659-a943-404a-b246-c95670c2e060\n'

In [None]:
# Tests for to-datasource. Passing wrong index and sort column names

with set_airt_service_token_envvar():
    with generate_db() as db:
    
        cmd = ["to-datasource", "--uuid", f"{db.uuid}", "--file-type", "parquet", "--index-column", "random-col", "--sort-by", "random-col", "-q"]
        result = runner.invoke(app, cmd)

        display(result.stdout)
        assert "'random-col'" in result.stdout, result.stdout

'Error: "Data has no column \'random-col\': use any column of [\'event_time\', \'event_type\', \'product_id\', \'category_id\', \'category_code\', \'brand\', \'price\', \'user_id\', \'user_session\']"\n'

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def ls(
    offset: int = typer.Option(
        0,
        "--offset",
        "-o",
        help="The number of datablobs to offset at the beginning. If **None**, then the default value **0** will be used.",
    ),
    limit: int = typer.Option(
        100,
        "--limit",
        "-l",
        help="The maximum number of datablobs to return from the server. If **None**, then the default value **100** will be used.",
    ),
    disabled: bool = typer.Option(
        False,
        "--disabled",
        help="If set to **True**, then only the deleted datablobs will be returned." \
            "Else, the default value **False** will be used to return only the list" \
            "of active datablobs."
    ),
    completed: bool = typer.Option(
        False,
        "--completed",
        help="If set to **True**, then only the datablobs that are successfully downloaded" \
            "to the server will be returned. Else, the default value **False** will be used to" \
            "return all the datablobs."
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output only datablob uuids separated by space",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Return the list of datablobs."""

    from airt.client import DataBlob
    
    dbx = DataBlob.ls(offset=offset, limit=limit, disabled=disabled, completed=completed)
    
    df = DataBlob.as_df(dbx)
    
    df['pulled_on'] = helper.humanize_date(df['pulled_on'])
    df['folder_size'] = helper.humanize_size(df['folder_size'])
    
    return {"df": df, "quite_column_name": "datablob_uuid"}

In [None]:
assert_has_help(["ls"])

'Usage: root ls [OPTIONS]\n\n  Return the list of datablobs.\n\nOptions:\n  -o, --offset INTEGER  The number of datablobs to offset at the beginning. If\n                        **None**, then the default value **0** will be used.\n                        [default: 0]\n  -l, --limit INTEGER   The maximum number of datablobs to return from the\n                        server. If **None**, then the default value **100** will\n                        be used.  [default: 100]\n  --disabled            If set to **True**, then only the deleted datablobs will\n                        be returned.Else, the default value **False** will be\n                        used to return only the listof active datablobs.\n  --completed           If set to **True**, then only the datablobs that are\n                        successfully downloadedto the server will be returned.\n                        Else, the default value **False** will be used toreturn\n                        all the datablobs.\n  -f

In [None]:
# Tests for datasource_ls
# Testing positive scenario. Saving the token in env variable

def get_uuids_from_result(result) -> List[int]:
    return [uuid for uuid in result.stdout[:-1].split("\n")]


with set_airt_service_token_envvar():

    # Without quiet
    format_str = "{'datablob_uuid': '{}', 'type': '{}'}"
    result = runner.invoke(app, ["ls", "--format", format_str])
    display(result.stdout)

    assert "type" in result.stdout
    assert result.exit_code == 0

    # With quiet
    result = runner.invoke(app, ["ls", "-q"])
    display(result.stdout)

    assert result.exit_code == 0
    uuids = get_uuids_from_result(result)
    display(f"{uuids=}")

'datablob_uuid                         type\ne6493ad2-bf39-4edb-b08e-0da06c148635  azure_blob_storage\n1966d500-0b0c-49ff-bd6e-fc3a99956910  s3\nc89b15b1-8e63-40f8-8824-4c3d5c857abc  azure_blob_storage\n8cdbc837-723d-48da-b3ac-9b0e8ad6435c  db\n76cb6e5c-4c9b-4b9a-8e93-7415f022a0b1  db\ned044904-4647-4001-896d-7b56bf047616  local\n406fb790-f5d9-4d86-8c11-a31ea91b1249  s3\nb78ae923-f6d5-49f4-bcbd-1492fdb8c00b  s3\n8efd5d95-a01c-49f6-8c63-d8fefd3dea22  s3\nc75e0516-6cbf-4356-b4b0-fdb9a393ecb8  azure_blob_storage\n1b4dfe61-781f-4267-a8b4-3956826bb351  azure_blob_storage\n75d00af3-19a1-4731-ac2b-0e7d11526aa2  azure_blob_storage\n9ee91442-b110-4af2-b2f1-b5445b6208e3  azure_blob_storage\nd8a17bcf-659b-4f51-95cc-ffee2fe191f9  db\n13c0b5d7-2d64-483e-9ee5-fb8cf476897b  db\n726efd1b-e79f-4c82-8c32-f37e25bde427  db\n7933283e-2232-4dda-a111-bb1e0b7551e1  db\n05a54859-1192-4bb4-8db6-6a88d2aa0043  db\n52fbdd0c-1726-4cfb-8317-fb4b1559643d  db\nd35c6ea5-3ad3-4979-9b19-2bda297be074  local\nc76d6759-edf4

'e6493ad2-bf39-4edb-b08e-0da06c148635\n1966d500-0b0c-49ff-bd6e-fc3a99956910\nc89b15b1-8e63-40f8-8824-4c3d5c857abc\n8cdbc837-723d-48da-b3ac-9b0e8ad6435c\n76cb6e5c-4c9b-4b9a-8e93-7415f022a0b1\ned044904-4647-4001-896d-7b56bf047616\n406fb790-f5d9-4d86-8c11-a31ea91b1249\nb78ae923-f6d5-49f4-bcbd-1492fdb8c00b\n8efd5d95-a01c-49f6-8c63-d8fefd3dea22\nc75e0516-6cbf-4356-b4b0-fdb9a393ecb8\n1b4dfe61-781f-4267-a8b4-3956826bb351\n75d00af3-19a1-4731-ac2b-0e7d11526aa2\n9ee91442-b110-4af2-b2f1-b5445b6208e3\nd8a17bcf-659b-4f51-95cc-ffee2fe191f9\n13c0b5d7-2d64-483e-9ee5-fb8cf476897b\n726efd1b-e79f-4c82-8c32-f37e25bde427\n7933283e-2232-4dda-a111-bb1e0b7551e1\n05a54859-1192-4bb4-8db6-6a88d2aa0043\n52fbdd0c-1726-4cfb-8317-fb4b1559643d\nd35c6ea5-3ad3-4979-9b19-2bda297be074\nc76d6759-edf4-4eaa-bc64-0145471f7272\n3cc0d538-c4f5-4cab-be69-d732262e4046\n2c3cccb6-09a4-4bfa-b278-525d65b68721\nbf2b7764-7b0e-443d-aa8d-154994108370\n9c2fc83a-2a26-4eaa-9f9f-4f71be50816d\nc507a683-864b-4fff-8c0a-1dd6c4e695f3\n7aa4d380-cf

"uuids=['e6493ad2-bf39-4edb-b08e-0da06c148635', '1966d500-0b0c-49ff-bd6e-fc3a99956910', 'c89b15b1-8e63-40f8-8824-4c3d5c857abc', '8cdbc837-723d-48da-b3ac-9b0e8ad6435c', '76cb6e5c-4c9b-4b9a-8e93-7415f022a0b1', 'ed044904-4647-4001-896d-7b56bf047616', '406fb790-f5d9-4d86-8c11-a31ea91b1249', 'b78ae923-f6d5-49f4-bcbd-1492fdb8c00b', '8efd5d95-a01c-49f6-8c63-d8fefd3dea22', 'c75e0516-6cbf-4356-b4b0-fdb9a393ecb8', '1b4dfe61-781f-4267-a8b4-3956826bb351', '75d00af3-19a1-4731-ac2b-0e7d11526aa2', '9ee91442-b110-4af2-b2f1-b5445b6208e3', 'd8a17bcf-659b-4f51-95cc-ffee2fe191f9', '13c0b5d7-2d64-483e-9ee5-fb8cf476897b', '726efd1b-e79f-4c82-8c32-f37e25bde427', '7933283e-2232-4dda-a111-bb1e0b7551e1', '05a54859-1192-4bb4-8db6-6a88d2aa0043', '52fbdd0c-1726-4cfb-8317-fb4b1559643d', 'd35c6ea5-3ad3-4979-9b19-2bda297be074', 'c76d6759-edf4-4eaa-bc64-0145471f7272', '3cc0d538-c4f5-4cab-be69-d732262e4046', '2c3cccb6-09a4-4bfa-b278-525d65b68721', 'bf2b7764-7b0e-443d-aa8d-154994108370', '9c2fc83a-2a26-4eaa-9f9f-4f71be5

In [None]:
# Tests for datasource_ls
# Testing positive scenario.
# Testing by passing different values for  limit


with set_airt_service_token_envvar():

    for limit in [1, 10, 1000]:
        offset = 1
        result = runner.invoke(
            app, ["ls", "--offset", offset, "--limit", limit, "-q"]
        )

        assert result.exit_code == 0

        uuids = get_uuids_from_result(result)
        display(f"{uuids=}")
        assert limit >= len(uuids) >= 0

"uuids=['1966d500-0b0c-49ff-bd6e-fc3a99956910']"

"uuids=['1966d500-0b0c-49ff-bd6e-fc3a99956910', 'c89b15b1-8e63-40f8-8824-4c3d5c857abc', '8cdbc837-723d-48da-b3ac-9b0e8ad6435c', '76cb6e5c-4c9b-4b9a-8e93-7415f022a0b1', 'ed044904-4647-4001-896d-7b56bf047616', '406fb790-f5d9-4d86-8c11-a31ea91b1249', 'b78ae923-f6d5-49f4-bcbd-1492fdb8c00b', '8efd5d95-a01c-49f6-8c63-d8fefd3dea22', 'c75e0516-6cbf-4356-b4b0-fdb9a393ecb8', '1b4dfe61-781f-4267-a8b4-3956826bb351']"

"uuids=['1966d500-0b0c-49ff-bd6e-fc3a99956910', 'c89b15b1-8e63-40f8-8824-4c3d5c857abc', '8cdbc837-723d-48da-b3ac-9b0e8ad6435c', '76cb6e5c-4c9b-4b9a-8e93-7415f022a0b1', 'ed044904-4647-4001-896d-7b56bf047616', '406fb790-f5d9-4d86-8c11-a31ea91b1249', 'b78ae923-f6d5-49f4-bcbd-1492fdb8c00b', '8efd5d95-a01c-49f6-8c63-d8fefd3dea22', 'c75e0516-6cbf-4356-b4b0-fdb9a393ecb8', '1b4dfe61-781f-4267-a8b4-3956826bb351', '75d00af3-19a1-4731-ac2b-0e7d11526aa2', '9ee91442-b110-4af2-b2f1-b5445b6208e3', 'd8a17bcf-659b-4f51-95cc-ffee2fe191f9', '13c0b5d7-2d64-483e-9ee5-fb8cf476897b', '726efd1b-e79f-4c82-8c32-f37e25bde427', '7933283e-2232-4dda-a111-bb1e0b7551e1', '05a54859-1192-4bb4-8db6-6a88d2aa0043', '52fbdd0c-1726-4cfb-8317-fb4b1559643d', 'd35c6ea5-3ad3-4979-9b19-2bda297be074', 'c76d6759-edf4-4eaa-bc64-0145471f7272', '3cc0d538-c4f5-4cab-be69-d732262e4046', '2c3cccb6-09a4-4bfa-b278-525d65b68721', 'bf2b7764-7b0e-443d-aa8d-154994108370', '9c2fc83a-2a26-4eaa-9f9f-4f71be50816d', 'c507a683-864b-4fff-8c0a-1dd6c4e

In [None]:
# Tests for datasource_ls
# Testing positive scenario.
# Testing by passing large value for offset.

with set_airt_service_token_envvar():

    limit = 10
    offset = 1_000_000
    result = runner.invoke(app, ["ls", "--offset", offset, "--limit", limit])

    assert result.exit_code == 0

    display(result.stdout)

'datablob_uuid    datasource_uuids    type    source    region    cloud_provider    tags    pulled_on    folder_size    ready\n'

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def rm(
    uuid: str = typer.Argument(
        ...,
        help="Datablob uuid.",
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output the deleted datablob uuid only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Delete a datablob from the server."""

    from airt.client import DataBlob
    
    db = DataBlob(uuid=uuid)
    df = db.delete()
    
    df['pulled_on'] = helper.humanize_date(df['pulled_on'])
    df['folder_size'] = helper.humanize_size(df['folder_size'])
    
    return {"df": df, "quite_column_name": "datablob_uuid"}

In [None]:
assert_has_help(["rm"])

'Usage: root rm [OPTIONS] UUID\n\n  Delete a datablob from the server.\n\nArguments:\n  UUID  Datablob uuid.  [required]\n\nOptions:\n  -f, --format TEXT  Format output and show only the given column(s) values.\n  -q, --quiet        Output the deleted datablob uuid only.\n  -d, --debug        Set logger level to DEBUG and output everything.\n  --help             Show this message and exit.\n'

In [None]:
# Tests for datasource rm
# Testing positive scenario with quite

with set_airt_service_token_envvar():
    with generate_db() as db:
        db_uuid = db.uuid

        # Deleting the created data source from the server
        result = runner.invoke(app, ["rm", db_uuid, "-q"])
        deleted_uuid = result.stdout[:-1]

        display(deleted_uuid)

        assert result.exit_code == 0
        assert deleted_uuid == db_uuid

        # List the existing data source ids in server and make sure the deleted id is not present in the server
        format_str = "{'datablob_uuid': '{}'}"
        ls_result = runner.invoke(app, ["ls", "--format", format_str])
        ls_uuids = get_uuids_from_result(ls_result)

        display(ls_uuids)
        assert deleted_uuid not in ls_uuids
        
        # ls with quiet and disabled = True
        result = runner.invoke(app, ["ls", "--disabled", "-q"])

        display(result.stdout)
        assert result.exit_code == 0

        disabled_db_uuids = get_uuids_from_result(result)

        display(f"{disabled_db_uuids=}")
        assert deleted_uuid in disabled_db_uuids

        # Testing negative scenario. Deleting already deleted data source
        result = runner.invoke(app, ["rm", deleted_uuid, "-q"])
        display(result.stdout)
        assert result.exit_code == 1

        # Testing negative scenario. Getting the details of the deleted data source
        result = runner.invoke(app, ["details", deleted_uuid])
        display(result.stdout)
        assert result.exit_code == 1

'5f4579a6-4fab-4f5e-9363-41abea96dd2e'

['e6493ad2-bf39-4edb-b08e-0da06c148635',
 '1966d500-0b0c-49ff-bd6e-fc3a99956910',
 'c89b15b1-8e63-40f8-8824-4c3d5c857abc',
 '8cdbc837-723d-48da-b3ac-9b0e8ad6435c',
 '76cb6e5c-4c9b-4b9a-8e93-7415f022a0b1',
 'ed044904-4647-4001-896d-7b56bf047616',
 '406fb790-f5d9-4d86-8c11-a31ea91b1249',
 'b78ae923-f6d5-49f4-bcbd-1492fdb8c00b',
 '8efd5d95-a01c-49f6-8c63-d8fefd3dea22',
 'c75e0516-6cbf-4356-b4b0-fdb9a393ecb8',
 '1b4dfe61-781f-4267-a8b4-3956826bb351',
 '75d00af3-19a1-4731-ac2b-0e7d11526aa2',
 '9ee91442-b110-4af2-b2f1-b5445b6208e3',
 'd8a17bcf-659b-4f51-95cc-ffee2fe191f9',
 '13c0b5d7-2d64-483e-9ee5-fb8cf476897b',
 '726efd1b-e79f-4c82-8c32-f37e25bde427',
 '7933283e-2232-4dda-a111-bb1e0b7551e1',
 '05a54859-1192-4bb4-8db6-6a88d2aa0043',
 '52fbdd0c-1726-4cfb-8317-fb4b1559643d',
 'd35c6ea5-3ad3-4979-9b19-2bda297be074',
 'c76d6759-edf4-4eaa-bc64-0145471f7272',
 '3cc0d538-c4f5-4cab-be69-d732262e4046',
 '2c3cccb6-09a4-4bfa-b278-525d65b68721',
 'bf2b7764-7b0e-443d-aa8d-154994108370',
 '9c2fc83a-2a26-

'b7500d61-9011-4db6-a86b-a3c521f2ed3e\n98161b86-953e-4a4f-8fc6-a8f0ab6184c0\nd8bcafc6-b2c1-4607-8a35-574f5e621229\nc0ea8dda-38fb-46bb-8e21-09bfc46d9022\n84b81d57-c8ca-4a55-a280-d4f164dc5fd9\n44974a95-4e06-4189-b4fa-b1c6970c6e49\n9f31dffc-9762-4c7d-b635-217fa5ddf32a\n9b2d5fd9-845c-4b02-bfe4-bc1a636c4613\n68c873c1-2df7-4552-84b8-b54c28953a82\n5f4579a6-4fab-4f5e-9363-41abea96dd2e\n'

"disabled_db_uuids=['b7500d61-9011-4db6-a86b-a3c521f2ed3e', '98161b86-953e-4a4f-8fc6-a8f0ab6184c0', 'd8bcafc6-b2c1-4607-8a35-574f5e621229', 'c0ea8dda-38fb-46bb-8e21-09bfc46d9022', '84b81d57-c8ca-4a55-a280-d4f164dc5fd9', '44974a95-4e06-4189-b4fa-b1c6970c6e49', '9f31dffc-9762-4c7d-b635-217fa5ddf32a', '9b2d5fd9-845c-4b02-bfe4-bc1a636c4613', '68c873c1-2df7-4552-84b8-b54c28953a82', '5f4579a6-4fab-4f5e-9363-41abea96dd2e']"

'Error: The datablob has already been deleted.\n'

'Error: The datablob has already been deleted.\n'

In [None]:
# Tests for datasource rm
# Testing negative scenario. Deleting invalid data source
with set_airt_service_token_envvar():
    # Testing negative scenario. Deleting already deleted data source
    result = runner.invoke(app, ["rm", RANDOM_UUID_FOR_TESTING, "-q"])

    display(result.stdout)

'Error: The datablob uuid is incorrect. Please try again.\n'

In [None]:
#| exporti


@app.command()
@helper.display_formated_table
@helper.requires_auth_token
def tag(
    uuid: str = typer.Option(
        ...,
        "--datablob_uuid",
        "-uuid",
        help="Datablob uuid in the server.",
    ),
    name: str = typer.Option(
        ...,
        "--name",
        "-n",
        help="A string to tag the datablob.",
    ),
    format: Optional[str] = typer.Option(
        None,
        "--format",
        "-f",
        help="Format output and show only the given column(s) values."
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> Dict["str", Union[pd.DataFrame, str]]:
    """Tag an existing datablob in the server."""

    from airt.client import DataBlob
    
    db = DataBlob(uuid=uuid)
    df = db.tag(name=name)
    
    df['pulled_on'] = helper.humanize_date(df['pulled_on'])
    df['folder_size'] = helper.humanize_size(df['folder_size'])
    
    return {"df": df}

In [None]:
assert_has_help(["tag"])

'Usage: root tag [OPTIONS]\n\n  Tag an existing datablob in the server.\n\nOptions:\n  -uuid, --datablob_uuid TEXT  Datablob uuid in the server.  [required]\n  -n, --name TEXT              A string to tag the datablob.  [required]\n  -f, --format TEXT            Format output and show only the given column(s)\n                               values.\n  -d, --debug                  Set logger level to DEBUG and output everything.\n  --help                       Show this message and exit.\n'

In [None]:
# Tests for tag
# Testing positive scenario

with set_airt_service_token_envvar():
    with generate_db(force_create=True) as db:
        db_uuid = db.uuid

        # Tag the data source
        format_str = "{'datablob_uuid': '{}', 'tags': '{}'}"
        result = runner.invoke(app, ["tag", "-uuid", db_uuid, "-n", "v1.1.0", "--format", format_str])

        display(result.stdout)

        assert result.exit_code == 0
        assert "v1.1.0" in str(result.stdout)

"_db.uuid='615b3357-e598-4a3e-a303-4483e6b3a092'"

100%|██████████| 1/1 [00:15<00:00, 15.18s/it]


'datablob_uuid                         tags\n615b3357-e598-4a3e-a303-4483e6b3a092  latest, v1.1.0\n'

In [None]:
#| exporti


@app.command("from-local")
@helper.requires_auth_token
def from_local(
    path: str = typer.Option(
        ...,
        "--path",
        "-p",
        help="The relative or absolute path to a local CSV/parquet file or to a directory containing the CSV/parquet files.",
    ),
    cloud_provider: Optional[str] = typer.Option(
        None,
        "--cloud-provider",
        "-cp",
        help="The destination cloud storage provider's name to store the datablob. Currently, the API only supports **aws** and **azure** as cloud storage providers. If **None** (default value), then **aws**  will be used as the cloud storage provider.",
    ),
    region: Optional[str] = typer.Option(
        None,
        "--region",
        "-r",
        help="The destination cloud provider's region to save your datablob. If **None** (default value) then the default region will be assigned based on the cloud provider. " \
            "In the case of **aws**, **eu-west-1** will be used and in the case of **azure**, **westeurope** will be used. " \
            "The supported AWS regions are: ap-northeast-1, ap-northeast-2, ap-south-1, ap-southeast-1, ap-southeast-2, ca-central-1, eu-central-1, " \
            "eu-north-1, eu-west-1, eu-west-2, eu-west-3, sa-east-1, us-east-1, us-east-2, us-west-1, us-west-2. The supported Azure Blob Storage " \
            "regions are: australiacentral, australiacentral2, australiaeast, australiasoutheast, brazilsouth, canadacentral, canadaeast, centralindia, " \
            "centralus, eastasia, eastus, eastus2, francecentral, francesouth, germanynorth, germanywestcentral, japaneast, japanwest, koreacentral, koreasouth, " \
            "northcentralus, northeurope, norwayeast, norwaywest, southafricanorth, southafricawest, southcentralus, southeastasia, southindia, switzerlandnorth, " \
            "switzerlandwest, uaecentral, uaenorth, uksouth, ukwest, westcentralus, westeurope, westindia, westus, westus2."

    ),
    tag: Optional[str] = typer.Option(
        None,
        "--tag",
        "-t",
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output data id only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
) -> None:
    """Create and return a datablob from local csv file.
    
    The API currently allows users to create datablobs from CSV or Parquet files. We intend to support additional file formats in future releases.
    """

    from airt.client import DataBlob

    if quiet:
        db = DataBlob.from_local(path=path, cloud_provider=cloud_provider, region=region, tag=tag, show_progress=False)
        typer.echo(f"{db.uuid}")
    else:
        db = DataBlob.from_local(path=path, cloud_provider=cloud_provider, region=region, tag=tag)
        typer.echo(f"Successfully pulled the datablob uuid: {db.uuid}.")

In [None]:
assert_has_help(["from-local"])

"Usage: root from-local [OPTIONS]\n\n  Create and return a datablob from local csv file.\n\n  The API currently allows users to create datablobs from CSV or Parquet files.\n  We intend to support additional file formats in future releases.\n\nOptions:\n  -p, --path TEXT             The relative or absolute path to a local\n                              CSV/parquet file or to a directory containing the\n                              CSV/parquet files.  [required]\n  -cp, --cloud-provider TEXT  The destination cloud storage provider's name to\n                              store the datablob. Currently, the API only\n                              supports **aws** and **azure** as cloud storage\n                              providers. If **None** (default value), then\n                              **aws**  will be used as the cloud storage\n                              provider.\n  -r, --region TEXT           The destination cloud provider's region to save\n                            

In [None]:
#| include: false

# Helper function to download a sample csv file into the temp directory for testing Datablob local csv command

def get_test_csv_path() -> Path:
    """Downloads the account_312571_events from the s3 bucket and stores it in temp folder. 
    Finally converts the downloaded account_312571_events files to a csv file and returns the
    path of the temp folder and the temp csv file.
    """
    temp_dirpath = Path(tempfile.mkdtemp(prefix="test_s3_download_"))

    !aws s3 sync {TEST_S3_URI} {temp_dirpath / "parquet"}

    parquet_path = Path(temp_dirpath / "parquet")
    csv_dirpath = Path(temp_dirpath / "csv")
    os.mkdir(csv_dirpath)
    
    for i, f in enumerate(list(parquet_path.glob("*.parquet"))):
        df = pd.read_parquet(f)
        df.to_csv(csv_dirpath / f"file-{i}.csv", index=False)
    
    display(list(csv_dirpath.glob("*")))

    return temp_dirpath, csv_dirpath

# Testing multiple files upload.

# Create temp directory
temp_dir, csv_dirpath = get_test_csv_path()

# Creating a new datasource
cmd = [
    "from-local",
    "--path",
    f"{csv_dirpath}"
]

cmd_q = [
    "from-local",
    "--path",
    f"{csv_dirpath}",
    "-q"
]

with set_airt_service_token_envvar():
    
    # Without quiet
    result = runner.invoke(app, cmd)
    
    display(result.stdout)
    assert "Successfully pulled the datablob uuid:" in str(result.stdout)
    
    # With quiet
    result = runner.invoke(app, cmd_q)
    db_uuid = result.stdout[:-1]
    display(db_uuid)
    assert len(remove_hypens_from_id(db_uuid)) == 32
    
    result = runner.invoke(app, ["details", db_uuid])
    display(result.stdout)
    assert result.exit_code == 0
    assert "eu-west-1" in result.stdout

# Deleting the temp directory
shutil.rmtree(temp_dir)
display(f"{temp_dir.exists()=}")
assert not temp_dir.exists()

download: s3://test-airt-service/ecommerce_behavior_notebooks/_common_metadata to ../../../tmp/test_s3_download_m9lr4eeg/parquet/_common_metadata
download: s3://test-airt-service/ecommerce_behavior_notebooks/_metadata to ../../../tmp/test_s3_download_m9lr4eeg/parquet/_metadata
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.0.parquet to ../../../tmp/test_s3_download_m9lr4eeg/parquet/part.0.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.13.parquet to ../../../tmp/test_s3_download_m9lr4eeg/parquet/part.13.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.15.parquet to ../../../tmp/test_s3_download_m9lr4eeg/parquet/part.15.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.10.parquet to ../../../tmp/test_s3_download_m9lr4eeg/parquet/part.10.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.14.parquet to ../../../tmp/test_s3_download_m9lr4eeg/parquet/part.14.parquet
down

[Path('/tmp/test_s3_download_m9lr4eeg/csv/file-1.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-15.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-6.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-8.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-17.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-19.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-18.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-5.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-14.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-0.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-10.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-7.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-12.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-4.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-2.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-3.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/csv/file-16.csv'),
 Path('/tmp/test_s3_download_m9lr4eeg/cs

'\r  0%|          | 0/20 [00:00<?, ?it/s]\r  5%|▌         | 1/20 [00:01<00:22,  1.20s/it]\r 10%|█         | 2/20 [00:02<00:21,  1.19s/it]\r 15%|█▌        | 3/20 [00:03<00:19,  1.14s/it]\r 20%|██        | 4/20 [00:04<00:18,  1.13s/it]\r 25%|██▌       | 5/20 [00:05<00:16,  1.12s/it]\r 30%|███       | 6/20 [00:06<00:15,  1.10s/it]\r 35%|███▌      | 7/20 [00:07<00:14,  1.13s/it]\r 40%|████      | 8/20 [00:08<00:13,  1.10s/it]\r 45%|████▌     | 9/20 [00:10<00:12,  1.10s/it]\r 50%|█████     | 10/20 [00:11<00:10,  1.10s/it]\r 55%|█████▌    | 11/20 [00:12<00:09,  1.09s/it]\r 60%|██████    | 12/20 [00:13<00:08,  1.12s/it]\r 65%|██████▌   | 13/20 [00:14<00:07,  1.11s/it]\r 70%|███████   | 14/20 [00:15<00:06,  1.13s/it]\r 75%|███████▌  | 15/20 [00:16<00:05,  1.13s/it]\r 80%|████████  | 16/20 [00:18<00:04,  1.17s/it]\r 85%|████████▌ | 17/20 [00:19<00:03,  1.15s/it]\r 90%|█████████ | 18/20 [00:20<00:02,  1.15s/it]\r 95%|█████████▌| 19/20 [00:21<00:01,  1.15s/it]\r100%|██████████| 20/20 [00:22<00:00

'b68d348a-0001-48ee-b04a-fc51ca83061b'

'datablob_uuid                         datasource_uuids    type    source                                    region     cloud_provider    tags    pulled_on    folder_size    user_uuid                             error    disabled    ready\nb68d348a-0001-48ee-b04a-fc51ca83061b  <none>              local   local:/tmp/test_s3_download_m9lr4eeg/csv  eu-west-1  aws               latest  None         unknown        06a385d1-66a1-4ffc-8306-7f5821902fcc  <none>   False       False\n'

'temp_dir.exists()=False'

In [None]:
# Testing single files upload.

# Create temp directory
temp_dir, csv_dirpath = get_test_csv_path()

# Creating a new datasource
cmd = [
    "from-local",
    "--path", str(csv_dirpath / "file-1.csv")
]

cmd_q = [
    "from-local",
    "--path", str(csv_dirpath / "file-1.csv"),
    "-q"
]


with set_airt_service_token_envvar():
    
    # Without quiet
    result = runner.invoke(app, cmd)
    
    display(result.stdout)
    assert "Successfully pulled the datablob uuid:" in str(result.stdout)
    
    # With quiet
    result = runner.invoke(app, cmd_q)

    display(result.stdout)
    assert len(remove_hypens_from_id(result.stdout[:-1])) == 32


# Deleting the temp directory
shutil.rmtree(temp_dir)
display(f"{temp_dir.exists()=}")
assert not temp_dir.exists()

download: s3://test-airt-service/ecommerce_behavior_notebooks/_common_metadata to ../../../tmp/test_s3_download_lwo2sndj/parquet/_common_metadata
download: s3://test-airt-service/ecommerce_behavior_notebooks/_metadata to ../../../tmp/test_s3_download_lwo2sndj/parquet/_metadata
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.0.parquet to ../../../tmp/test_s3_download_lwo2sndj/parquet/part.0.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.1.parquet to ../../../tmp/test_s3_download_lwo2sndj/parquet/part.1.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.10.parquet to ../../../tmp/test_s3_download_lwo2sndj/parquet/part.10.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.15.parquet to ../../../tmp/test_s3_download_lwo2sndj/parquet/part.15.parquet
download: s3://test-airt-service/ecommerce_behavior_notebooks/part.12.parquet to ../../../tmp/test_s3_download_lwo2sndj/parquet/part.12.parquet
downlo

[Path('/tmp/test_s3_download_lwo2sndj/csv/file-1.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-15.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-6.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-8.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-17.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-19.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-18.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-5.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-14.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-0.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-10.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-7.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-12.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-4.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-2.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-3.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/csv/file-16.csv'),
 Path('/tmp/test_s3_download_lwo2sndj/cs

'\r  0%|          | 0/1 [00:00<?, ?it/s]\r100%|██████████| 1/1 [00:01<00:00,  1.18s/it]\r100%|██████████| 1/1 [00:01<00:00,  1.18s/it]\nSuccessfully pulled the datablob uuid: 5141bb5a-bd08-4743-8d0e-3d0569cb3d4d.\n'

'6f6dd429-7051-4cdc-ba75-53079e65749c\n'

'temp_dir.exists()=False'

In [None]:
#| exporti


@app.command("from-clickhouse")
@helper.requires_auth_token
def from_clickhouse(
    host: str = typer.Option(..., help="Remote database host name."),
    database: str = typer.Option(..., help="Database name."),
    table: str = typer.Option(..., help="Table name."),
    protocol: str = typer.Option(..., help='Protocol to use. The valid values are "native" and "http".'),
    index_column: str = typer.Option(
        ..., help="The column to use as index (row labels)."
    ),
    timestamp_column: str = typer.Option(..., help="Timestamp column name in the tabel."),
    port: int = typer.Option(
        0,
        help="Host port number. If not passed, then the default value **0** will be used.",
    ),
    cloud_provider: Optional[str] = typer.Option(
        None,
        "--cloud-provider",
        "-cp",
        help="The destination cloud storage provider's name to store the datablob. Currently, the API only supports **aws** and **azure** as cloud storage providers. If **None** (default value), then **aws**  will be used as the cloud storage provider.",
    ),
    region: Optional[str] = typer.Option(
        None,
        "--region",
        "-r",
        help="The destination cloud provider's region to save your datablob. If **None** (default value) then the default region will be assigned based on the cloud provider. " \
            "In the case of **aws**, **eu-west-1** will be used and in the case of **azure**, **westeurope** will be used. " \
            "The supported AWS regions are: ap-northeast-1, ap-northeast-2, ap-south-1, ap-southeast-1, ap-southeast-2, ca-central-1, eu-central-1, " \
            "eu-north-1, eu-west-1, eu-west-2, eu-west-3, sa-east-1, us-east-1, us-east-2, us-west-1, us-west-2. The supported Azure Blob Storage " \
            "regions are: australiacentral, australiacentral2, australiaeast, australiasoutheast, brazilsouth, canadacentral, canadaeast, centralindia, " \
            "centralus, eastasia, eastus, eastus2, francecentral, francesouth, germanynorth, germanywestcentral, japaneast, japanwest, koreacentral, koreasouth, " \
            "northcentralus, northeurope, norwayeast, norwaywest, southafricanorth, southafricawest, southcentralus, southeastasia, southindia, switzerlandnorth, " \
            "switzerlandwest, uaecentral, uaenorth, uksouth, ukwest, westcentralus, westeurope, westindia, westus, westus2."
    ),
    username: Optional[str] = typer.Option(
        None,
        "--username",
        "-u",
        help="Database username. If not passed, the default value 'root' will be used unless the value is explicitly set in the environment variable **CLICKHOUSE_USERNAME**."
    ),
    password: Optional[str] = typer.Option(
        None,
        "--password",
        "-p",
        help="Database password. If not passed, the default value '' will be used unless the value is explicitly set in the environment variable **CLICKHOUSE_PASSWORD**."
    ),
    filters_json: Optional[str] = typer.Option(
        None,
        "--filters-json",
        "-f",
        help="Additional parameters to be used when importing data. For example, if you want to filter and extract data only for a specific user_id, pass '{"
        '"user_id"'
        ": 1}'.",
    ),
    tag: Optional[str] = typer.Option(
        None,
        "--tag",
        "-t",
        help="A string to tag the datablob. If not passed, then the tag **latest** will be assigned to the datablob.",
    ),
    quiet: bool = typer.Option(
        False,
        "--quiet",
        "-q",
        help="Output datablob uuid only.",
    ),
    debug: bool = typer.Option(
        False,
        "--debug",
        "-d",
        help="Set logger level to DEBUG and output everything.",
    ),
):
    """Create and return a datablob that encapsulates the data from a ClickHouse database.

    If the database requires authentication, pass the username/password as commandline arguments or store it in
    the **CLICKHOUSE_USERNAME** and **CLICKHOUSE_PASSWORD** environment variables.
    """

    filters = json.loads(filters_json) if filters_json else None
    
    from airt.client import DataBlob

    db = DataBlob.from_clickhouse(
        host=host,
        database=database,
        table=table,
        protocol=protocol,
        index_column=index_column,
        timestamp_column=timestamp_column,
        port=port,
        username=username,
        password=password,
        filters=filters,
        cloud_provider=cloud_provider,
        region=region,
        tag=tag,
    )

    if quiet:
        db.wait()
        typer.echo(f"{db.uuid}")
    else:
        typer.echo(f"Pulling datablob uuid: {db.uuid}")
        db.progress_bar()

In [None]:
assert_has_help(["from-clickhouse"])

'Usage: root from-clickhouse [OPTIONS]\n\n  Create and return a datablob that encapsulates the data from a ClickHouse\n  database.\n\n  If the database requires authentication, pass the username/password as\n  commandline arguments or store it in the **CLICKHOUSE_USERNAME** and\n  **CLICKHOUSE_PASSWORD** environment variables.\n\nOptions:\n  --host TEXT                 Remote database host name.  [required]\n  --database TEXT             Database name.  [required]\n  --table TEXT                Table name.  [required]\n  --protocol TEXT             Protocol to use. The valid values are "native" and\n                              "http".  [required]\n  --index-column TEXT         The column to use as index (row labels).\n                              [required]\n  --timestamp-column TEXT     Timestamp column name in the tabel.  [required]\n  --port INTEGER              Host port number. If not passed, then the default\n                              value **0** will be used.  [default: 0

In [None]:
# tests for from-clickhouse. Testing positive scenario.

cmd = [
    "from-clickhouse",
    "--host", os.environ.get("CLICKHOUSE_HOST"),
    "--database", os.environ.get("CLICKHOUSE_DATABASE"),
    "--table", os.environ.get("CLICKHOUSE_EVENTS_TABLE"),
    "--protocol", "native",
    "--index-column", "PersonId",
    "--timestamp-column", "OccurredTimeTicks",
    "--filters-json", '{"AccountId": 312571}'
]


with set_airt_service_token_envvar():
    # Without quiet (verbose)
    result = runner.invoke(app, cmd)
    display(result.stdout)
    assert "Pulling datablob uuid: " in result.stdout
    
    
    result = runner.invoke(app, cmd + ["-q"])
    db_uuid = result.stdout[:-1]
    display(db_uuid)
    assert len(remove_hypens_from_id(db_uuid)) == 32
    
    result = runner.invoke(app, ["details", db_uuid])
    display(result.stdout)
    assert result.exit_code == 0
    assert "eu-west-1" in result.stdout
    
    result = runner.invoke(app, cmd + ["-cp", "aws", "--region", "eu-west-3", "-q"])
    db_uuid = result.stdout[:-1]
    display(db_uuid)
    assert len(remove_hypens_from_id(db_uuid)) == 32
    
    result = runner.invoke(app, ["details", db_uuid])
    display(result.stdout)
    assert result.exit_code == 0
    assert "eu-west-3" in result.stdout

'Pulling datablob uuid: ba17d649-00da-400c-b610-06be7a53085e\n\r  0%|          | 0/1 [00:00<?, ?it/s]\r  0%|          | 0/1 [00:05<?, ?it/s]\r  0%|          | 0/1 [00:10<?, ?it/s]\r  0%|          | 0/1 [00:15<?, ?it/s]\r100%|██████████| 1/1 [00:20<00:00,  5.05s/it]\r100%|██████████| 1/1 [00:20<00:00, 20.24s/it]\n'

'be2e0852-a938-4c60-aaa9-0027d25cadd7'

'datablob_uuid                         datasource_uuids    type    source                                                        region     cloud_provider    tags    pulled_on      folder_size    user_uuid                             error    disabled    ready\nbe2e0852-a938-4c60-aaa9-0027d25cadd7  <none>              db      clickhouse+native://35.158.134.25:0/infobip/airt_training_3m  eu-west-1  aws               latest  2 seconds ago  8.9 MB         06a385d1-66a1-4ffc-8306-7f5821902fcc  <none>   False       True\n'

'a30f79f4-14ec-42f0-85f8-f9b1495e0be6'

'datablob_uuid                         datasource_uuids    type    source                                                        region     cloud_provider    tags    pulled_on      folder_size    user_uuid                             error    disabled    ready\na30f79f4-14ec-42f0-85f8-f9b1495e0be6  <none>              db      clickhouse+native://35.158.134.25:0/infobip/airt_training_3m  eu-west-3  aws               latest  3 seconds ago  8.9 MB         06a385d1-66a1-4ffc-8306-7f5821902fcc  <none>   False       True\n'

In [None]:
# tests for from-clickhouse. Testing negative scenario.

cmd = [
    "from-clickhouse",
    "--host", os.environ.get("CLICKHOUSE_HOST"),
    "--database", "fake-database",
    "--table", "fake-table",
    "--protocol", "native",
    "--index-column", "PersonId",
    "--timestamp-column", "OccurredTimeTicks",
    "-f", '{"AccountId": 312571}'
]


with set_airt_service_token_envvar():
    # Without quiet (verbose)
    result = runner.invoke(app, cmd)
    display(result.stdout)
    assert result.exit_code == 1

"Pulling datablob uuid: 9c24af99-f638-4abd-b9da-4a58cd20ccfa\n\r  0%|          | 0/1 [00:00<?, ?it/s]\r  0%|          | 0/1 [00:05<?, ?it/s]\r  0%|          | 0/1 [00:10<?, ?it/s]\nError: Orig exception: Code: 81.\nDB::Exception: Database `fake-database` doesn't exist. Stack trace:\n\n0. DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int, bool) @ 0x8b6cbba in /usr/bin/cli\n"