In [None]:
# | default_exp data.csv

In [None]:
from airt.testing import activate_by_import

[INFO] airt.testing.activate_by_import: Testing environment activated.
[INFO] numexpr.utils: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
[INFO] numexpr.utils: NumExpr defaulting to 8 threads.
[INFO] airt.keras.helpers: Using a single GPU #0 with memory_limit 1024 MB


In [None]:
# | export

import json
from typing import *

from airt.data.importers import import_csv
from airt.logger import get_logger
from airt.remote_path import RemotePath
from fastcore.script import Param, call_parse
from fastcore.utils import *
from sqlmodel import select

import airt_service.sanitizer
from airt_service.aws.utils import create_s3_datasource_path
from airt_service.azure.utils import create_azure_blob_storage_datasource_path
from airt_service.data.datasource import DataSource
from airt_service.data.utils import (
    calculate_azure_data_object_folder_size_and_path,
    calculate_data_object_folder_size_and_path,
    calculate_data_object_pulled_on,
)
from airt_service.db.models import DataBlob, get_session_with_context
from airt_service.helpers import truncate

[INFO] airt.data.importers: Module loaded:
[INFO] airt.data.importers:  - using pandas     : 1.5.1
[INFO] airt.data.importers:  - using dask       : 2022.10.0


In [None]:
from os import environ
from time import sleep

import dask.dataframe as dd
import pandas as pd
import requests

from airt_service.aws.utils import upload_to_s3_with_retry
from airt_service.constants import DS_HEAD_FILE_NAME, METADATA_FOLDER_PATH
from airt_service.data.datablob import FromLocalRequest, from_local_start_route
from airt_service.db.models import User, create_user_for_testing, get_session

[INFO] airt.executor.subcommand: Module loaded.


In [None]:
test_username = create_user_for_testing()
display(test_username)

'exjcallcpz'

In [None]:
# | exporti

logger = get_logger(__name__)

In [None]:
# | export


@call_parse  # type: ignore
def process_csv(
    datablob_id: Param("datablob_id", int),  # type: ignore
    datasource_id: Param("datasource_id", int),  # type: ignore
    *,
    deduplicate_data: Param("deduplicate_data", bool) = False,  # type: ignore
    index_column: Param("index_column", str),  # type: ignore
    sort_by: Param("sort_by", str),  # type: ignore
    blocksize: Param("blocksize", str) = "256MB",  # type: ignore
    kwargs_json: Param("kwargs_json", str) = "{}",  # type: ignore
) -> None:
    """Download the user uploaded CSV from S3, run import_csv against it and finally upload the processed parquet files to S3

    Args:
        datablob_id: Datablob id
        datasource_id: Datasource id
        deduplicate_data: If set to True (default value False), then duplicate rows are removed while uploading
        index_column: Name of the column to use as index and partition the data
        sort_by: Name of the column to sort data within the same index value
        blocksize: Size of partition
        kwargs_json: Parameters as json string which are passed to the **dask.dataframe.read_csv()** function,
            typically params for underlining **pd.read_csv()** from Pandas.
    """
    logger.info(
        f"process_csv({datablob_id=}, {datasource_id=}): processing user uploaded csv file for {datablob_id=} and uploading parquet back to S3 for {datasource_id=}"
    )
    with get_session_with_context() as session:
        datablob = session.exec(
            select(DataBlob).where(DataBlob.id == datablob_id)
        ).one()
        datasource = session.exec(
            select(DataSource).where(DataSource.id == datasource_id)
        ).one()

        # Following is needed if datablob was created from user uploaded csv files
        calculate_data_object_folder_size_and_path(datablob)

        datasource.error = None
        datasource.completed_steps = 0
        datasource.folder_size = None
        datasource.no_of_rows = None
        datasource.path = None
        datasource.hash = None

        try:
            source_path = datablob.path
            if datasource.cloud_provider == "aws":
                destination_bucket, s3_path = create_s3_datasource_path(
                    user_id=datasource.user.id,
                    datasource_id=datasource.id,
                    region=datasource.region,
                )
                destination_remote_url = f"s3://{destination_bucket.name}/{s3_path}"
            elif datasource.cloud_provider == "azure":
                (
                    destination_container_client,
                    destination_azure_blob_storage_path,
                ) = create_azure_blob_storage_datasource_path(
                    user_id=datasource.user.id,
                    datasource_id=datasource.id,
                    region=datasource.region,
                )
                destination_remote_url = f"{destination_container_client.url}/{destination_azure_blob_storage_path}"
            logger.info(
                f"process_csv({datablob_id=}, {datasource_id=}): step 1/4: downloading user uploaded file from bucket {source_path}"
            )

            with RemotePath.from_url(
                remote_url=source_path,
                pull_on_enter=True,
                push_on_exit=False,
                exist_ok=True,
                parents=False,
            ) as source_s3_path:
                calculate_data_object_pulled_on(datasource)
                if len(list(source_s3_path.as_path().iterdir())) == 0:
                    raise ValueError("Files not found")
                logger.info(
                    f"process_csv({datablob_id=}, {datasource_id=}): step 2/4: running import_csv()"
                )

                with RemotePath.from_url(
                    remote_url=destination_remote_url,
                    pull_on_enter=False,
                    push_on_exit=True,
                    exist_ok=True,
                    parents=True,
                ) as destination_path:
                    processed_path = destination_path.as_path()
                    kwargs = json.loads(kwargs_json)
                    try:
                        sort_by = json.loads(sort_by)
                    except json.JSONDecodeError:
                        pass
                    import_csv(
                        input_path=source_s3_path.as_path(),
                        output_path=processed_path,
                        index_column=index_column,
                        sort_by=sort_by,
                        blocksize=blocksize,
                        **kwargs,
                    )
                    datasource.calculate_properties(processed_path)

                    if not len(list(processed_path.glob("*.parquet"))) > 0:
                        raise ValueError(
                            f"processing failed; parquet files not available"
                        )

                    logger.info(
                        f"process_csv({datablob_id=}, {datasource_id=}): step 3/4: uploading parquet files back to path {destination_path}"
                    )
            logger.info(
                f"process_csv({datablob_id=}, {datasource_id=}): step 4/4: calculating datasource attributes - folder_size, no_of_rows, head, hash"
            )

            calculate_data_object_folder_size_and_path(datasource)
        except Exception as e:
            logger.error(
                f"process_csv({datablob_id=}, {datasource_id=}): error: {str(e)}"
            )
            datasource.error = truncate(str(e))
        logger.info(f"process_csv({datablob_id=}, {datasource_id=}): completed")
        session.add(datablob)
        session.add(datasource)
        session.commit()

In [None]:
# Create a csv datasource and upload single csv file using presigned url
with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == test_username)).one()
    from_local_request = FromLocalRequest(
        path="tmp/test-folder/", tag="my_csv_datasource_tag"
    )
    from_local_response = from_local_start_route(
        from_local_request=from_local_request,
        user=user,
        session=session,
    )

    with RemotePath.from_url(
        remote_url=f"s3://test-airt-service/account_312571_events",
        pull_on_enter=True,
        push_on_exit=False,
        exist_ok=True,
        parents=False,
        access_key=environ["AWS_ACCESS_KEY_ID"],
        secret_key=environ["AWS_SECRET_ACCESS_KEY"],
    ) as test_s3_path:
        df = pd.read_parquet(test_s3_path.as_path())
        display(df.head())
        df.to_csv(test_s3_path.as_path() / "file.csv", index=False)
        display(list(test_s3_path.as_path().glob("*")))
        !head -n 10 {test_s3_path.as_path()/"file.csv"}

        upload_to_s3_with_retry(
            test_s3_path.as_path() / "file.csv",
            from_local_response.presigned["url"],
            from_local_response.presigned["fields"],
        )

    datablob_id = (
        session.exec(select(DataBlob).where(DataBlob.uuid == from_local_response.uuid))
        .one()
        .id
    )
    datasource = DataSource(
        datablob_id=datablob_id,
        cloud_provider="aws",
        region="eu-west-1",
        total_steps=1,
        user=user,
    )
    session.add(datasource)
    session.commit()

    datasource_id = datasource.id
    user_id = user.id

[INFO] botocore.credentials: Found credentials in environment variables.
[INFO] airt_service.data.datablob: DataBlob.from_local(): FromLocalResponse(uuid=UUID('2a0a74eb-4fa2-4590-ba7b-a9b8b0f95934'), type='local', presigned={'url': 'https://kumaran-airt-service-eu-west-1.s3.amazonaws.com/', 'fields': {'key': '****************************************', 'AWSAccessKeyId': '********************', 'policy': '************************************************************************************************************************************************************************************************************************************************************', 'signature': '****************************'}})
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url s3://test-airt-service/account_312571_events
[INFO] airt.remote_path: S3Path._create_cache_path(): created cache path: /tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga
[INFO] airt.remote

Unnamed: 0_level_0,AccountId,DefinitionId,OccurredTime,OccurredTimeTicks,PersonId
__null_dask_index__,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,312571,loadTests2,2019-12-31 21:30:02,1577836802678,2
1,312571,loadTests3,2020-01-03 23:53:22,1578104602678,2
2,312571,loadTests1,2020-01-07 02:16:42,1578372402678,2
3,312571,loadTests2,2020-01-10 04:40:02,1578640202678,2
4,312571,loadTests3,2020-01-13 07:03:22,1578908002678,2


[Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga/_metadata'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga/_common_metadata'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga/file.csv'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga/part.3.parquet'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga/part.0.parquet'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga/part.1.parquet'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga/part.4.parquet'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga/part.2.parquet')]

AccountId,DefinitionId,OccurredTime,OccurredTimeTicks,PersonId
312571,loadTests2,2019-12-31 21:30:02,1577836802678,2
312571,loadTests3,2020-01-03 23:53:22,1578104602678,2
312571,loadTests1,2020-01-07 02:16:42,1578372402678,2
312571,loadTests2,2020-01-10 04:40:02,1578640202678,2
312571,loadTests3,2020-01-13 07:03:22,1578908002678,2
312571,loadTests1,2020-01-16 09:26:42,1579175802678,2
312571,loadTests2,2020-01-19 11:50:02,1579443602678,2
312571,loadTests3,2020-01-22 14:13:22,1579711402678,2
312571,loadTests1,2020-01-25 16:36:42,1579979202678,2
[INFO] airt.remote_path: S3Path._clean_up(): removing local cache path /tmp/s3test-airt-serviceaccount_312571_events_cached_2xbwdvga


In [None]:
# Test process_csv

process_csv(
    datablob_id=datablob_id,
    datasource_id=datasource_id,
    deduplicate_data=True,
    index_column="PersonId",
    sort_by=json.dumps(["OccurredTime"]),
    blocksize="256MB",
    kwargs_json=json.dumps(
        dict(
            usecols=[0, 1, 2, 3, 4],
            parse_dates=["OccurredTime"],
        )
    ),
)

with get_session_with_context() as session:
    datasource = session.exec(
        select(DataSource).where(DataSource.id == datasource_id)
    ).one()
    display(datasource)
    assert datasource.folder_size == 6619982, datasource.folder_size
    assert datasource.no_of_rows == 498961
    assert (
        datasource.path
        == f"s3://{environ['STORAGE_BUCKET_PREFIX']}-eu-west-1/{user_id}/datasource/{datasource.id}"
    ), datasource.path
    assert datasource.hash == "1dd8ee7a0f96a48110dec6e25891d18d", datasource.hash

    # tests for datasource head and dtypes
    with RemotePath.from_url(
        remote_url=f"{datasource.path}",
        pull_on_enter=True,
        push_on_exit=False,
        exist_ok=True,
        parents=False,
    ) as test_s3_info_path:
        processed_test_s3_info_path = test_s3_info_path.as_path()

        head_df = pd.read_parquet(
            processed_test_s3_info_path / METADATA_FOLDER_PATH / DS_HEAD_FILE_NAME
        )
        assert head_df.index.name == "PersonId"
        assert head_df.shape == (10, 4)

        dtypes_dict = head_df.dtypes.apply(lambda x: x.name).to_dict()
        assert dtypes_dict == {
            "AccountId": "int64",
            "DefinitionId": "object",
            "OccurredTime": "datetime64[ns]",
            "OccurredTimeTicks": "int64",
        }, dtypes_dict

[INFO] __main__: process_csv(datablob_id=51, datasource_id=29): processing user uploaded csv file for datablob_id=51 and uploading parquet back to S3 for datasource_id=29
[INFO] __main__: process_csv(datablob_id=51, datasource_id=29): step 1/4: downloading user uploaded file from bucket s3://kumaran-airt-service-eu-west-1/132/datablob/51
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url s3://kumaran-airt-service-eu-west-1/132/datablob/51
[INFO] airt.remote_path: S3Path._create_cache_path(): created cache path: /tmp/s3kumaran-airt-service-eu-west-1132datablob51_cached_ntxr64d4
[INFO] airt.remote_path: S3Path.__init__(): created object for accessing s3://kumaran-airt-service-eu-west-1/132/datablob/51 locally in /tmp/s3kumaran-airt-service-eu-west-1132datablob51_cached_ntxr64d4
[INFO] airt.remote_path: S3Path.__enter__(): pulling data from s3://kumaran-airt-service-eu-west-1/132/datablob/51 to /tmp/s3kumaran-airt-service-eu-west-1132datablob51_cac

DataSource(id=29, uuid=UUID('c98bd928-e0bf-4072-ac77-a6e9d2c8db8a'), hash='1dd8ee7a0f96a48110dec6e25891d18d', total_steps=1, completed_steps=1, folder_size=6619982, no_of_rows=498961, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path='s3://kumaran-airt-service-eu-west-1/132/datasource/29', created=datetime.datetime(2022, 10, 27, 8, 10, 18), user_id=132, pulled_on=datetime.datetime(2022, 10, 27, 8, 10, 25), tags=[])

[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url s3://kumaran-airt-service-eu-west-1/132/datasource/29
[INFO] airt.remote_path: S3Path._create_cache_path(): created cache path: /tmp/s3kumaran-airt-service-eu-west-1132datasource29_cached_kx3y8q1r
[INFO] airt.remote_path: S3Path.__init__(): created object for accessing s3://kumaran-airt-service-eu-west-1/132/datasource/29 locally in /tmp/s3kumaran-airt-service-eu-west-1132datasource29_cached_kx3y8q1r
[INFO] airt.remote_path: S3Path.__enter__(): pulling data from s3://kumaran-airt-service-eu-west-1/132/datasource/29 to /tmp/s3kumaran-airt-service-eu-west-1132datasource29_cached_kx3y8q1r
[INFO] airt.remote_path: S3Path._clean_up(): removing local cache path /tmp/s3kumaran-airt-service-eu-west-1132datasource29_cached_kx3y8q1r


In [None]:
# Create a csv datasource and upload multiple csv files using presigned url
with get_session_with_context() as session:
    user = session.exec(select(User).where(User.username == test_username)).one()
    from_local_request = FromLocalRequest(
        path="tmp/test-folder/", tag="my_csv_datasource_tag"
    )
    from_local_response = from_local_start_route(
        from_local_request=from_local_request,
        user=user,
        session=session,
    )

    with RemotePath.from_url(
        remote_url=f"s3://test-airt-service/account_312571_events",
        pull_on_enter=True,
        push_on_exit=False,
        exist_ok=True,
        parents=False,
        access_key=environ["AWS_ACCESS_KEY_ID"],
        secret_key=environ["AWS_SECRET_ACCESS_KEY"],
    ) as test_s3_path:
        ddf = dd.read_parquet(test_s3_path.as_path())
        display(ddf.head())
        ddf.to_csv(test_s3_path.as_path() / "csv" / "file-*.csv", index=False)
        display(list((test_s3_path.as_path() / "csv").glob("*")))
        !head -n 10 {test_s3_path.as_path()/"csv"/"file-0.csv"}
        sleep(10)

        for csv_to_upload in sorted((test_s3_path.as_path() / "csv").glob("*.csv")):
            display(f"Uploading {csv_to_upload}")
            upload_to_s3_with_retry(
                csv_to_upload,
                from_local_response.presigned["url"],
                from_local_response.presigned["fields"],
            )

    datablob_id = (
        session.exec(select(DataBlob).where(DataBlob.uuid == from_local_response.uuid))
        .one()
        .id
    )
    datasource = DataSource(
        datablob_id=datablob_id,
        cloud_provider="aws",
        region="us-west-1",
        total_steps=1,
        user=user,
    )
    session.add(datasource)
    session.commit()

    datasource_id = datasource.id
    user_id = user.id

[INFO] airt_service.data.datablob: DataBlob.from_local(): FromLocalResponse(uuid=UUID('705db79a-c658-40d4-8160-4989eda1e326'), type='local', presigned={'url': 'https://kumaran-airt-service-eu-west-1.s3.amazonaws.com/', 'fields': {'key': '****************************************', 'AWSAccessKeyId': '********************', 'policy': '************************************************************************************************************************************************************************************************************************************************************', 'signature': '****************************'}})
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url s3://test-airt-service/account_312571_events
[INFO] airt.remote_path: S3Path._create_cache_path(): created cache path: /tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c
[INFO] airt.remote_path: S3Path.__init__(): created object for accessing s3://test-airt-ser

Unnamed: 0,AccountId,DefinitionId,OccurredTime,OccurredTimeTicks,PersonId
0,312571,loadTests2,2019-12-31 21:30:02,1577836802678,2
1,312571,loadTests3,2020-01-03 23:53:22,1578104602678,2
2,312571,loadTests1,2020-01-07 02:16:42,1578372402678,2
3,312571,loadTests2,2020-01-10 04:40:02,1578640202678,2
4,312571,loadTests3,2020-01-13 07:03:22,1578908002678,2


[Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-4.csv'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-3.csv'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-2.csv'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-0.csv'),
 Path('/tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-1.csv')]

AccountId,DefinitionId,OccurredTime,OccurredTimeTicks,PersonId
312571,loadTests2,2019-12-31 21:30:02,1577836802678,2
312571,loadTests3,2020-01-03 23:53:22,1578104602678,2
312571,loadTests1,2020-01-07 02:16:42,1578372402678,2
312571,loadTests2,2020-01-10 04:40:02,1578640202678,2
312571,loadTests3,2020-01-13 07:03:22,1578908002678,2
312571,loadTests1,2020-01-16 09:26:42,1579175802678,2
312571,loadTests2,2020-01-19 11:50:02,1579443602678,2
312571,loadTests3,2020-01-22 14:13:22,1579711402678,2
312571,loadTests1,2020-01-25 16:36:42,1579979202678,2


'Uploading /tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-0.csv'

'Uploading /tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-1.csv'

'Uploading /tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-2.csv'

'Uploading /tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-3.csv'

'Uploading /tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c/csv/file-4.csv'

[INFO] airt.remote_path: S3Path._clean_up(): removing local cache path /tmp/s3test-airt-serviceaccount_312571_events_cached_1trnug7c


In [None]:
# Test process_csv
process_csv(
    datablob_id=datablob_id,
    datasource_id=datasource_id,
    deduplicate_data=True,
    index_column="PersonId",
    sort_by="OccurredTime",
    blocksize="256MB",
    kwargs_json=json.dumps(
        dict(
            usecols=[0, 1, 2, 3, 4],
            parse_dates=["OccurredTime"],
        )
    ),
)

with get_session_with_context() as session:
    datasource = session.exec(
        select(DataSource).where(DataSource.id == datasource_id)
    ).one()
    display(datasource)
    assert datasource.folder_size == 6619982, datasource.folder_size
    assert datasource.no_of_rows == 498961
    assert (
        datasource.path
        == f"s3://{environ['STORAGE_BUCKET_PREFIX']}-us-west-1/{user_id}/datasource/{datasource.id}"
    ), datasource.path
    assert datasource.hash == "1dd8ee7a0f96a48110dec6e25891d18d", datasource.hash

    # tests for datasource head and dtypes
    with RemotePath.from_url(
        remote_url=f"{datasource.path}",
        pull_on_enter=True,
        push_on_exit=False,
        exist_ok=True,
        parents=False,
    ) as test_s3_info_path:
        processed_test_s3_info_path = test_s3_info_path.as_path()

        head_df = pd.read_parquet(
            processed_test_s3_info_path / METADATA_FOLDER_PATH / DS_HEAD_FILE_NAME
        )
        assert head_df.index.name == "PersonId"
        assert head_df.shape == (10, 4)

        dtypes_dict = head_df.dtypes.apply(lambda x: x.name).to_dict()
        assert dtypes_dict == {
            "AccountId": "int64",
            "DefinitionId": "object",
            "OccurredTime": "datetime64[ns]",
            "OccurredTimeTicks": "int64",
        }, dtypes_dict

[INFO] __main__: process_csv(datablob_id=53, datasource_id=32): processing user uploaded csv file for datablob_id=53 and uploading parquet back to S3 for datasource_id=32
[INFO] __main__: process_csv(datablob_id=53, datasource_id=32): step 1/4: downloading user uploaded file from bucket s3://kumaran-airt-service-eu-west-1/132/datablob/53
[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url s3://kumaran-airt-service-eu-west-1/132/datablob/53
[INFO] airt.remote_path: S3Path._create_cache_path(): created cache path: /tmp/s3kumaran-airt-service-eu-west-1132datablob53_cached_at_1drr7
[INFO] airt.remote_path: S3Path.__init__(): created object for accessing s3://kumaran-airt-service-eu-west-1/132/datablob/53 locally in /tmp/s3kumaran-airt-service-eu-west-1132datablob53_cached_at_1drr7
[INFO] airt.remote_path: S3Path.__enter__(): pulling data from s3://kumaran-airt-service-eu-west-1/132/datablob/53 to /tmp/s3kumaran-airt-service-eu-west-1132datablob53_cac

DataSource(id=32, uuid=UUID('0e742604-2180-4b49-9ff7-fb23f178cf44'), hash='1dd8ee7a0f96a48110dec6e25891d18d', total_steps=1, completed_steps=1, folder_size=6619982, no_of_rows=498961, cloud_provider=<CloudProvider.aws: 'aws'>, region='us-west-1', error=None, disabled=False, path='s3://kumaran-airt-service-us-west-1/132/datasource/32', created=datetime.datetime(2022, 10, 27, 8, 12, 43), user_id=132, pulled_on=datetime.datetime(2022, 10, 27, 8, 12, 52), tags=[])

[INFO] airt.remote_path: RemotePath.from_url(): creating remote path with the following url s3://kumaran-airt-service-us-west-1/132/datasource/32
[INFO] airt.remote_path: S3Path._create_cache_path(): created cache path: /tmp/s3kumaran-airt-service-us-west-1132datasource32_cached_5zkc9719
[INFO] airt.remote_path: S3Path.__init__(): created object for accessing s3://kumaran-airt-service-us-west-1/132/datasource/32 locally in /tmp/s3kumaran-airt-service-us-west-1132datasource32_cached_5zkc9719
[INFO] airt.remote_path: S3Path.__enter__(): pulling data from s3://kumaran-airt-service-us-west-1/132/datasource/32 to /tmp/s3kumaran-airt-service-us-west-1132datasource32_cached_5zkc9719
[INFO] airt.remote_path: S3Path._clean_up(): removing local cache path /tmp/s3kumaran-airt-service-us-west-1132datasource32_cached_5zkc9719
