In [0]:
spark

In [0]:
dbutils.library.installPyPI("great_expectations")

In [0]:
context.list_datasources()

In [0]:
################################

In [0]:
from great_expectations.data_context.types.base import DatasourceConfig
from great_expectations.core.batch import BatchRequest
from great_expectations.core.batch import RuntimeBatchRequest
from great_expectations.data_context.types.base import DataContextConfig, DatasourceConfig, FilesystemStoreBackendDefaults
from great_expectations.data_context import BaseDataContext

# Load your data into a dataframe

file_location = "/FileStore/tables/Building_Permits.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
    .option("inferSchema", infer_schema) \
    .option("header", first_row_is_header) \
    .option("sep", delimiter) \
    .load(file_location)

# Create a DataContext in code from a DataContextConfig with DatasourceConfig

my_spark_datasource_config = DatasourceConfig(
    class_name="Datasource",
    execution_engine={"class_name": "SparkDFExecutionEngine"},
    data_connectors={
      "my_runtime_data_connector_name": {
        "module_name": "great_expectations.datasource.data_connector",
        "class_name": "RuntimeDataConnector",
        "batch_identifiers": [
          "some_batch_identifier_so_this_can_work"
        ]
      }
    }
  )

project_config = DataContextConfig(
    datasources={"my_datasource_name": my_spark_datasource_config},
    store_backend_defaults=FilesystemStoreBackendDefaults(root_directory="/dbfs/FileStore/")
)

context = BaseDataContext(project_config=project_config)

# Create a RuntimeBatchRequest
batch_request = RuntimeBatchRequest(
    datasource_name="my_datasource_name",
    data_connector_name="my_runtime_data_connector_name",
    data_asset_name="my_data_asset_name",
runtime_parameters={
      "batch_data": df
    },
    batch_identifiers={
        "some_batch_identifier_so_this_can_work": "blah",
    }
)

# Create or load your Expectation Suite
# NOTE: You should either create or load, this try/except block is for convenience
from great_expectations.exceptions import DataContextError
try:
  suite = context.create_expectation_suite("my_suite_name1")
except DataContextError:
  suite = context.get_expectation_suite("my_suite_name1")

# Get a Validator
my_validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite=suite
)

# Add Expectations
my_validator.expect_table_row_count_to_equal(140)
my_validator.expect_column_values_to_not_be_null("Permit Number")
my_validator.expect_column_to_exist("Road Number")

# Save the Expectation Suite to the Expectation Store
my_validator.save_expectation_suite(discard_failed_expectations=False)

In [0]:
context.get_expectation_suite("my_suite_name1")

In [0]:
from great_expectations.data_context import BaseDataContext

file_location = "/FileStore/tables/Building_Permits.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
    .option("inferSchema", infer_schema) \
    .option("header", first_row_is_header) \
    .option("sep", delimiter) \
    .load(file_location)

# NOTE: project_config is a DataContextConfig set up as in the examples above.
context = BaseDataContext(project_config=data_context_config)
suite = context.get_expectation_suite("my_suite_name")



my_batch = context.get_batch({
  "dataset": df,
  "datasource": "my_spark_datasource",
}, context.get_expectation_suite("my_suite_name1"))

my_batch.validate()
