# Hero Experience
- This Notebook outlines the steps we can take to get a single `Batch` of data and running the `RuleBasedProfiler` on it.  

In [None]:
import great_expectations as ge
from great_expectations.core.batch import BatchRequest
from great_expectations.core import ExpectationSuite

from great_expectations.rule_based_profiler.config.base import RuleBasedProfilerConfig
from great_expectations.rule_based_profiler.rule_based_profiler import BaseRuleBasedProfiler

from great_expectations.core.yaml_handler import YAMLHandler
from great_expectations.checkpoint.checkpoint import SimpleCheckpoint

In [None]:
data_context: ge.DataContext = ge.get_context()

In [None]:
# loading the first 3 months of 2019 Taxi data into our datasource
data_path: str = "../../../../test_sets/taxi_yellow_tripdata_samples/first_3_files"
datasource_config = {
    "name": "taxi_pandas",
    "class_name": "Datasource",
    "module_name": "great_expectations.datasource",
    "execution_engine": {
        "module_name": "great_expectations.execution_engine",
        "class_name": "PandasExecutionEngine",
    },
    "data_connectors": {
        "monthly": {
            "class_name": "ConfiguredAssetFilesystemDataConnector",
            "base_directory": data_path,
            "assets":{
                "my_reports": {
                    "base_directory": "./",
                    "group_names": ["name", "year", "month"],
                    "pattern": "^(.+)_(\\d.*)-(\\d.*)\\.csv",
                    "module_name": "great_expectations.datasource.data_connector.asset",
                    "class_name": "Asset",
                },          
            },
        },
    },
}

data_context.test_yaml_config(yaml_handler.dump(datasource_config))

In [None]:
# add_datasource only if it doesn't already exist in our configuration
try:
    data_context.get_datasource(datasource_config["name"])
except ValueError:
    data_context.add_datasource(**datasource_config)

## Build BatchRequest and Retrieve Batch
In this example, we will be using a `BatchRequest` that returns a single `Batch`, namely the most recent 2019 taxi data asset in our `Asset`.

In [None]:
hero_batch: BatchRequest = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={
      "index": "-1"
    }
)

In [None]:
expectation_suite = data_context.create_expectation_suite(
    expectation_suite_name="temp_suite", overwrite_existing=True
)
validator = data_context.get_validator(
    batch_request=hero_batch,
    expectation_suite=expectation_suite,
)
validator.head(n_rows=5, fetch_all=False)

In [None]:
# this will be used later when we are displaying our Checkpoint results
columns = validator.active_batch.head().columns

# Load Profiler


In [None]:
# this part will be taken care of by the Data Assistants

In [None]:
configuration_path = "../bobby_user_workflow_verbose_profiler_config.yml"

In [None]:
with open(configuration_path) as f:
    yaml_handler = YAMLHandler()
    read = f.read()
    yaml_config = yaml_handler.load(read)

In [None]:
profiler_configuration: RuleBasedProfilerConfig = RuleBasedProfilerConfig(**yaml_config)

In [None]:
profiler: BaseRuleBasedProfiler = BaseRuleBasedProfiler(
    rbp,
    data_context=data_context,
)

In [None]:
expectation_suite: ExpectationSuite = profiler.run(
    expectation_suite_name="NewExpectationSuite",
    # what does a batch_quest do here? 
    batch_request=hero_batch,
    include_citation=False,
    
)

In [None]:
# TODO : output of which Expectations are being included?

In [None]:
data_context.save_expectation_suite(expectation_suite)

# Use Profiled ExpectationSuite to run Checkpoint

In [None]:
checkpoint: SimpleCheckpoint = SimpleCheckpoint(
    data_context=data_context,
    name="hero_checkpoint",
    validations=[{
         "batch_request": hero_batch,
         "expectation_suite_name": "NewExpectationSuite"
    }]
)

In [None]:
results = checkpoint.run()

In [None]:
data_context.build_data_docs()