In [None]:
import great_expectations as gx
import os

context = gx.get_context()

This notebook requires the setup of two environment variables:
- `AZURE_STORAGE_ACCOUNT_URL`: Which refers to the Storage Account that contains your data. More information can be found in the (Azure Documentation)[https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview]
- `AZURE_CREDENTIAL`: which contains the credential string.

In [None]:
# Explicitly create data docs site to use filesystem store with known file location.
# This is done to simplify hosting data docs within the containers, the default is to write to a temp directory.

context.add_data_docs_site(
    site_config={
        "class_name": "SiteBuilder",
        "store_backend": {
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "/gx/gx_stores/data_docs",
        },
        "site_index_builder": {"class_name": "DefaultSiteIndexBuilder"},
    },
    site_name="local_site_for_hosting",
)

In [None]:
datasource_name = "pandas_abs_example"
azure_options = {
    "account_url": "${AZURE_STORAGE_ACCOUNT_URL}",
    "credential": "${AZURE_CREDENTIAL}",
}
datasource = context.sources.add_pandas_abs(
    name=datasource_name, azure_options=azure_options
)

In [None]:
assert datasource_name in context.datasources

In [None]:
asset_name = "my_taxi_data_asset"
abs_container = "superconductive-public"
abs_name_starts_with = "data/taxi_yellow_tripdata_samples/"
batching_regex = r"yellow_tripdata_sample_(?P<year>\d{4})-(?P<month>\d{2})\.csv"

In [None]:
data_asset = datasource.add_csv_asset(
    name=asset_name,
    batching_regex=batching_regex,
    abs_container=abs_container,
    abs_name_starts_with=abs_name_starts_with,
)

In [None]:
print("data_asset.batch_request_options:", data_asset.batch_request_options)

In [None]:
batch_request = data_asset.build_batch_request({"year": "2019", "month": "03"})

In [None]:
batches = data_asset.get_batch_list_from_batch_request(batch_request)
print("len(batches):", len(batches))

In [None]:
expectation_suite_name = "my_expectation_suite"
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)

In [None]:
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name,
)
validator.head()

In [None]:
print("columns:", validator.active_batch.data.dataframe.columns)

In [None]:
validator.expect_column_values_to_not_be_null("pickup_datetime")
validator.expect_column_values_to_be_between("passenger_count", auto=True)

In [None]:
validator.save_expectation_suite(discard_failed_expectations=False)

In [None]:
checkpoint = context.add_or_update_checkpoint(
    name="my_quickstart_checkpoint",
    validator=validator,
)

In [None]:
checkpoint_result = checkpoint.run()

In [None]:
checkpoint_result.success