In [1]:
import great_expectations as gx
from great_expectations.cli.datasource import sanitize_yaml_and_save_datasource, check_if_datasource_name_exists
context = gx.get_context()

In [2]:
from path_config import folder_path
import os

file_name = "synthetic_ecommerce_employee_performance_data.csv"
file_path = os.path.join(folder_path, file_name)

validator = context.sources.pandas_default.read_csv(file_path)


In [3]:
# Give your Datasource a name
datasource_name = 'ecommerce_datasource'
datasource = context.sources.add_pandas(datasource_name)

# Give your first Asset a name
asset_name = 'ecommerce_asset'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=file_path)

# Build batch request
batch_request = asset.build_batch_request()

In [4]:
data_asset = context.get_datasource('ecommerce_datasource').get_asset('ecommerce_asset')
batch_request = data_asset.build_batch_request()

In [5]:
context.add_or_update_expectation_suite("ecommerce_expectation_suite")

{
  "expectation_suite_name": "ecommerce_expectation_suite",
  "ge_cloud_id": null,
  "expectations": [],
  "data_asset_type": null,
  "meta": {
    "great_expectations_version": "0.18.19"
  }
}

In [6]:
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name="ecommerce_expectation_suite",
)
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,EmployeeID,TaskDate,TaskTime,TaskType,Duration,OrdersProcessed,ComplaintsHandled,Feedback,ResponseTime
0,9318b353-2ce8-4324-be78-19b7e334aaeb,2024-04-09,09:46:35,customer_service,117,56,8,positive,29
1,0e6ef032-edf6-4beb-8e6a-2e288c66e840,2024-03-01,19:31:54,inventory_management,60,22,10,neutral,26
2,479a4e6f-7d92-4bce-9bda-0dc762802cf6,2024-02-26,19:02:19,inventory_management,151,13,1,positive,30
3,7f966d4b-dd3a-47f4-9401-ad2f18137125,2024-05-09,20:18:05,customer_service,17,15,8,neutral,8
4,b3900fa8-68d9-497d-ab20-09568282fa1f,2024-03-24,23:08:26,order_processing,117,59,6,negative,37


In [7]:
# Definir Expectativas
validator.expect_column_values_to_not_be_null(column="EmployeeID")
validator.expect_column_values_to_not_be_null(column="TaskDate")
validator.expect_column_values_to_be_in_set(column="TaskType", value_set=['customer_service', 'order_processing', 'inventory_management'])
validator.expect_column_median_to_be_between(column="Duration", min_value=10, max_value=180)

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 95.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [8]:
validator.save_expectation_suite(discard_failed_expectations=False)

In [9]:
checkpoint = context.add_or_update_checkpoint(
    name="my_quickstart_checkpoint",
    validator=validator,
)

In [10]:
checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/21 [00:00<?, ?it/s]

In [11]:
print(validator.head())

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

                             EmployeeID    TaskDate  TaskTime  \
0  9318b353-2ce8-4324-be78-19b7e334aaeb  2024-04-09  09:46:35   
1  0e6ef032-edf6-4beb-8e6a-2e288c66e840  2024-03-01  19:31:54   
2  479a4e6f-7d92-4bce-9bda-0dc762802cf6  2024-02-26  19:02:19   
3  7f966d4b-dd3a-47f4-9401-ad2f18137125  2024-05-09  20:18:05   
4  b3900fa8-68d9-497d-ab20-09568282fa1f  2024-03-24  23:08:26   

               TaskType  Duration  OrdersProcessed  ComplaintsHandled  \
0      customer_service       117               56                  8   
1  inventory_management        60               22                 10   
2  inventory_management       151               13                  1   
3      customer_service        17               15                  8   
4      order_processing       117               59                  6   

   Feedback  ResponseTime  
0  positive            29  
1   neutral            26  
2  positive            30  
3   neutral             8  
4  negative            37  


In [12]:
context.open_data_docs()