In [0]:
dbutils.library.installPyPI("great_expectations")

In [0]:
###################################################################

In [0]:
import great_expectations as ge
from great_expectations.data_context.types.base import DataContextConfig, DatasourceConfig, FilesystemStoreBackendDefaults
from great_expectations.data_context import BaseDataContext

# Example filesystem Datasource
spark_datasource_config = DatasourceConfig(
    class_name="SparkDFDatasource",
    batch_kwargs_generators={
        "subdir_reader": {
            "class_name": "SubdirReaderBatchKwargsGenerator",
            "base_directory": "/FileStore/tables/",
        }
    },
)

data_context_config = DataContextConfig(
    datasources={"my_spark_datasource": spark_datasource_config},
    store_backend_defaults=FilesystemStoreBackendDefaults(root_directory="/dbfs/FileStore/"),
)
context = BaseDataContext(project_config=data_context_config)

In [0]:
from great_expectations.data_context import BaseDataContext

file_location = "/FileStore/tables/Building_Permits.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
    .option("inferSchema", infer_schema) \
    .option("header", first_row_is_header) \
    .option("sep", delimiter) \
    .load(file_location)

# NOTE: project_config is a DataContextConfig set up as in the examples above.
context = BaseDataContext(project_config=data_context_config)
context.create_expectation_suite("validation_building_permits3")

my_batch = context.get_batch({
    "dataset": df,
    "datasource": "my_spark_datasource",
}, "validation_building_permits3")



In [0]:
my_batch.expect_table_row_count_to_equal(198900)

In [0]:
my_batch.expect_table_row_count_to_equal(1000)

In [0]:
my_batch.save_expectation_suite(discard_failed_expectations=False)

In [0]:
my_batch.expect_column_to_exist("Street Number")

In [0]:
my_batch.expect_column_to_exist("Road Number")

In [0]:
my_batch.expect_table_row_count_to_be_between(1,1000000)