In [None]:
!pip install great_expectations

In [1]:
#https://docs.greatexpectations.io/docs/core/introduction/try_gx
#Version for demo 1.0.0

#Prequisites: Python version 3.8 to 3.11

#pip install pandas
#pip install great_expectations


#1- Import the following libraries
import great_expectations as gx
# Pandas -> 
import pandas as pd

# -> Line to check gx core version: print(gx.__version__)



#3- A Data Context object serves as the entrypoint for interacting with GX components.
context = gx.get_context()


In [2]:

# 2- Download and read the sample data into a Pandas DataFrame.
df = pd.read_csv(
    "https://raw.githubusercontent.com/great-expectations/gx_tutorials/main/data/yellow_tripdata_sample_2019-01.csv"
)

#4- Connect to data and create a Batch.
# Define a Data Source, Data Asset, Batch Definition, and Batch. The Pandas DataFrame is provided to the Batch Definition at runtime to create the Batch.

data_source = context.data_sources.add_pandas("pandas")
data_asset = data_source.add_dataframe_asset(name="pd dataframe asset")

batch_definition = data_asset.add_batch_definition_whole_dataframe("batch definition")
batch = batch_definition.get_batch(batch_parameters={"dataframe": df})


In [None]:
#Create an Expectation.
#Expectations are a fundamental component of GX. They allow you to explicitly define the state to which your data should conform.
#Run the following code to define an Expectation that the contents of the column passenger_count consist of values ranging from 2 to 6:
expectation = gx.expectations.ExpectColumnValuesToBeBetween(
    column="passenger_count", min_value=2, max_value=6
)



In [5]:

#Run and get the results!
validation_result = batch.validate(expectation)
print(validation_result)


Calculating Metrics:   0%|          | 0/10 [00:00<?, ?it/s]

{
  "success": false,
  "expectation_config": {
    "type": "expect_column_values_to_be_between",
    "kwargs": {
      "batch_id": "pandas-pd dataframe asset",
      "column": "passenger_count",
      "min_value": 2.0,
      "max_value": 6.0
    },
    "meta": {}
  },
  "result": {
    "element_count": 10000,
    "unexpected_count": 7299,
    "unexpected_percent": 72.99,
    "partial_unexpected_list": [
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1,
      1
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 72.99,
    "unexpected_percent_nonmissing": 72.99,
    "partial_unexpected_counts": [
      {
        "value": 1,
        "count": 20
      }
    ],
    "partial_unexpected_index_list": [
      0,
      1,
      2,
      3,
      4,
      5,
      6,
      7,
      8,
      9,
      10,
      11,
      12,
      13,
    