In [1]:
import great_expectations as ge
from great_expectations import DataContext
from great_expectations.data_context.types.base import DataContextConfig
from great_expectations.data_context import BaseDataContext
import great_expectations.exceptions as ge_exceptions

from great_expectations.dataset import SparkDFDataset


import json
import datetime

In [None]:
# File location and type
file_location = "dc_wikia_data.csv"
file_type = "csv"

# CSV options
infer_schema = "false"
first_row_is_header = "false"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

display(df)

In [None]:
project_config = DataContextConfig(
    config_version=2,
    plugins_directory=None,
    config_variables_file_path=None,
    datasources={
       "my_local_datasource": {
           "data_asset_type": {
               "class_name": "SparkDFDataset",
               "module_name": "great_expectations.dataset",
           },
           "class_name": "SparkDFDatasource",
           "module_name": "great_expectations.datasource",
           "batch_kwargs_generators": {},
       }
    },

    stores={
       "expectations_store": {
           "class_name": "ExpectationsStore",
           "store_backend": {
               "class_name": "TupleFilesystemStoreBackend",
               "base_directory": "/FileStore/testing/expectations/"
           },
       },
       "validations_store": {
           "class_name": "ValidationsStore",
           "store_backend": {
               "class_name": "TupleFilesystemStoreBackend",
               "base_directory": "/FileStore/testing/validations/"
           },
       },
       "evaluation_parameter_store": {"class_name": "EvaluationParameterStore"},
    },
    expectations_store_name="expectations_store",
    validations_store_name="validations_store",
    evaluation_parameter_store_name="evaluation_parameter_store",
    data_docs_sites={
       "local_site": {
           "class_name": "SiteBuilder",
           "store_backend": {
               "class_name": "TupleFilesystemStoreBackend",
               "base_directory": "/FileStore/testing/data_docs/"
           },
           "site_index_builder": {
               "class_name": "DefaultSiteIndexBuilder",
               "show_cta_footer": True,
           },
       }
    },
    validation_operators={
       "action_list_operator": {
           "class_name": "ActionListValidationOperator",
           "action_list": [
               {
                   "name": "store_validation_result",
                   "action": {"class_name": "StoreValidationResultAction"},
               },
               {
                   "name": "store_evaluation_params",
                   "action": {"class_name": "StoreEvaluationParametersAction"},
               },
               {
                   "name": "update_data_docs",
                   "action": {"class_name": "UpdateDataDocsAction"},
               },
           ],
       }
    },
    anonymous_usage_statistics={
     "enabled": True
    }
    )

In [None]:
context = BaseDataContext(project_config=project_config)


In [None]:
context.create_expectation_suite("my_new_suite")


In [None]:
my_batch = context.get_batch({
      "dataset": df,
      "datasource": "my_local_datasource",
   }, "my_new_suite")

In [None]:
my_batch.expect_table_columns_to_match_ordered_list(["_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6", "_c7","_c8", "_c9", "_c10", "_c11", "_c12"])


In [None]:
GE_spark_df.save_expectation_suite(filepath="/FileStore/testing/expectations/test.json")