In [1]:
import pytest

import great_expectations as gx
from great_expectations.core.batch import BatchRequest
from great_expectations.core.yaml_handler import YAMLHandler
from great_expectations.datasource.fluent.interfaces import DataAsset
import pathlib
import os

In [2]:
yaml = YAMLHandler()
context = gx.get_context()
current_path = pathlib.Path()

In [3]:
# current_path = pathlib.Path().parents[1]
data_path = (
    current_path
    / ".."
    / ".."
    / "test_sets"
    / "taxi_yellow_tripdata_samples"
    / "first_ten_trips_in_each_file"
)
context.sources.add_pandas_filesystem(
    "taxi_multi_batch_datasource",
    base_directory=data_path,  # replace with your data directory
).add_csv_asset(
    "all_years",
    batching_regex=r"yellow_tripdata_sample_(?P<year>\d{4})-(?P<month>\d{2})\.csv",
)
expectation_suite_name = "my_missingness_assistant_suite"
expectation_suite = context.add_or_update_expectation_suite(
    expectation_suite_name=expectation_suite_name
)
all_years_asset: DataAsset = context.datasources[
    "taxi_multi_batch_datasource"
].get_asset("all_years")

multi_batch_all_years_batch_request: BatchRequest = (
    all_years_asset.build_batch_request()
)
exclude_column_names = [
    "VendorID",
    "pickup_datetime",
    "dropoff_datetime",
    "RatecodeID",
    "PULocationID",
    "DOLocationID",
    "payment_type",
    "fare_amount",
    "extra",
    "mta_tax",
    "tip_amount",
    "tolls_amount",
    "improvement_surcharge",
    "congestion_surcharge",
]
data_assistant_result = context.assistants.missingness.run(
    batch_request=multi_batch_all_years_batch_request,
    exclude_column_names=exclude_column_names,
)
expectation_suite = data_assistant_result.get_expectation_suite(
    expectation_suite_name=expectation_suite_name
)
context.add_or_update_expectation_suite(expectation_suite=expectation_suite)
checkpoint = context.add_or_update_checkpoint(
    name=f"yellow_tripdata_sample_{expectation_suite_name}",
    validations=[
        {
            "batch_request": multi_batch_all_years_batch_request,
            "expectation_suite_name": expectation_suite_name,
        }
    ],
)
checkpoint_result = checkpoint.run()
print(checkpoint_result)
print("hello")
res = data_assistant_result.plot_expectations_and_metrics()




Generating Expectations:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Profiling Dataset:         0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/36 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/36 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/36 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/36 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/36 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/36 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/36 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/36 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/180 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/23 [00:00<?, ?it/s]

{
  "run_id": {
    "run_time": "2023-09-11T18:57:53.466601-07:00",
    "run_name": null
  },
  "run_results": {
    "ValidationResultIdentifier::my_missingness_assistant_suite/__none__/20230912T015753.466601Z/taxi_multi_batch_datasource-all_years-year_2020-month_12": {
      "validation_result": {
        "evaluation_parameters": {},
        "statistics": {
          "evaluated_expectations": 4,
          "successful_expectations": 4,
          "unsuccessful_expectations": 0,
          "success_percent": 100.0
        },
        "results": [
          {
            "result": {
              "element_count": 10,
              "unexpected_count": 0,
              "unexpected_percent": 0.0,
              "partial_unexpected_list": [],
              "partial_unexpected_index_list": [],
              "partial_unexpected_counts": []
            },
            "expectation_config": {
              "expectation_type": "expect_column_values_to_not_be_null",
              "kwargs": {
          

In [4]:
data_assistant_result.plot_expectations_and_metrics()

hello


