In [1]:
import great_expectations as gx

from great_expectations.checkpoint import Checkpoint

In [2]:
context = gx.get_context()

In [3]:
# POSTGRES_CONNECTION_STRING=postgresql://postgres:${MY_DB_PW}@localhost:5432/postgres
PG_CONNECTION_STRING = "postgresql+psycopg2://postgres:rinintha@localhost:5432/postgres"

In [4]:
pg_datasource = context.sources.add_sql(
    name="pg_datasource", connection_string=PG_CONNECTION_STRING
)

In [5]:
pg_datasource.add_table_asset(
    name="car_assignment", table_name="car_assignment"
)

TableAsset(name='car_assignment', type='table', id=None, order_by=[], batch_metadata={}, splitter=None, table_name='car_assignment', schema_name=None)

In [6]:
batch_request = pg_datasource.get_asset("car_assignment").build_batch_request()

In [7]:
expectation_suite_name = "hacktiv8_car_assignment"
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name,
)

print(validator.head())

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 58.79it/s]

   car_id  symboling                   carname fueltype aspiration doornumber  \
0       1          3        alfa-romero giulia      gas        std        two   
1       2          3       alfa-romero stelvio      gas        std        two   
2       3          1  alfa-romero Quadrifoglio      gas        std        two   
3       4          2               audi 100 ls      gas        std       four   
4       5          2                audi 100ls      gas        std       four   

       carbody drivewheel enginelocation  wheelbase  ...  enginesize  \
0  convertible        rwd          front       88.6  ...         130   
1  convertible        rwd          front       88.6  ...         130   
2    hatchback        rwd          front       94.5  ...         152   
3        sedan        fwd          front       99.8  ...         109   
4        sedan        4wd          front       99.4  ...         136   

   fuelsystem  boreratio  stroke compressionratio horsepower  peakrpm citympg  \




In [8]:
validator.expect_column_values_to_not_be_null(column="car_id")

Calculating Metrics: 100%|██████████| 8/8 [00:00<00:00, 129.04it/s]


{
  "result": {
    "element_count": 205,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [9]:
validator.expect_column_values_to_be_between(
    column="car_id", min_value=0, max_value=300
)

Calculating Metrics: 100%|██████████| 11/11 [00:00<00:00, 177.41it/s]


{
  "result": {
    "element_count": 205,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [10]:
validator.expect_column_to_exist(column="symboling")

Calculating Metrics: 100%|██████████| 2/2 [00:00<00:00, 222.37it/s]


{
  "result": {},
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [11]:
validator.expect_column_values_to_be_unique('car_id')

Calculating Metrics: 100%|██████████| 10/10 [00:00<00:00, 172.42it/s]


{
  "result": {
    "element_count": 205,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [12]:
validator.expect_column_values_to_be_in_type_list("horsepower", ["INTEGER"])

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 125.09it/s]


{
  "result": {
    "observed_value": "INTEGER"
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [13]:
validator.expect_column_values_to_be_in_set("enginelocation", ['front', 'rear'])

Calculating Metrics: 100%|██████████| 11/11 [00:00<00:00, 171.87it/s]


{
  "result": {
    "element_count": 205,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [14]:
validator.expect_column_max_to_be_between("peakrpm", 1, 7000)

Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 222.09it/s]

Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 181.81it/s]


{
  "result": {
    "observed_value": 6600
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [15]:
validator.expect_column_mean_to_be_between("price", 100, 14000)

Calculating Metrics:  67%|██████▋   | 4/6 [00:00<00:00, 400.06it/s] 

Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 200.03it/s]


{
  "result": {
    "observed_value": 13276.710575457317
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [16]:
validator.expect_column_mean_to_be_between("horsepower", 50, 100)

Calculating Metrics:  33%|███▎      | 2/6 [00:00<00:00, 399.93it/s] 

Calculating Metrics:  83%|████████▎ | 5/6 [00:00<00:00, 263.16it/s]Using lossy conversion for decimal 104.1170731707317073 to float object to support serialization.
Calculating Metrics: 100%|██████████| 6/6 [00:01<00:00,  4.41it/s] 


{
  "result": {
    "observed_value": 104.1170731707317
  },
  "success": false,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {}
}

In [17]:
validator.save_expectation_suite(discard_failed_expectations=False)

In [18]:
my_checkpoint_name = "hacktiv8_cars_checkpoint"

checkpoint = Checkpoint(
    name=my_checkpoint_name,
    run_name_template="%Y%m%d-%H%M%S-my-run-name-template",
    data_context=context,
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name,
    action_list=[
        {
            "name": "store_validation_result",
            "action": {"class_name": "StoreValidationResultAction"},
        },
        {"name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}},
    ],
)

In [19]:
context.add_or_update_checkpoint(checkpoint=checkpoint)

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction"
      }
    }
  ],
  "batch_request": {
    "datasource_name": "pg_datasource",
    "data_asset_name": "car_assignment",
    "options": {}
  },
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "expectation_suite_name": "hacktiv8_car_assignment",
  "module_name": "great_expectations.checkpoint",
  "name": "hacktiv8_cars_checkpoint",
  "profilers": [],
  "run_name_template": "%Y%m%d-%H%M%S-my-run-name-template",
  "runtime_configuration": {},
  "validations": []
}

In [20]:
checkpoint_result = checkpoint.run()

Calculating Metrics:  33%|███▎      | 12/36 [00:00<00:00, 31.25it/s]Using lossy conversion for decimal 104.1170731707317073 to float object to support serialization.
Calculating Metrics: 100%|██████████| 36/36 [00:00<00:00, 113.92it/s]


In [21]:
context.open_data_docs()