## Import Libraries

In [1]:
from great_expectations.data_context import FileDataContext

## Instantiate Data Context

In [2]:
# Crate data context
context = FileDataContext.create(project_root_dir='./')

## Connect to A `Datasource`

In [None]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'automotives_sales_datasource'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'auto_sales_data'
path_to_data = 'dags/data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

## Expectation Suite

In [None]:
# Creat an expectation suite
expectation_suite_name = 'automotives_sales_expectation'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,ordernumber,quantityordered,priceeach,orderlinenumber,sales,orderdate,days_since_lastorder,status,productline,msrp,...,customername,phone,addressline,city,postalcode,country,contactlastname,contactfirstname,dealsize,order_product
0,10107,30,95.7,2,2871.0,2018-02-24,828,Shipped,Motorcycles,95,...,Land of Toys Inc.,2125557818,897 Long Airport Avenue,NYC,10022,USA,Yu,Kwai,Small,10107_S10_1678
1,10121,34,81.35,5,2765.9,2018-05-07,757,Shipped,Motorcycles,95,...,Reims Collectables,26.47.1555,59 rue de l'Abbaye,Reims,51100,France,Henriot,Paul,Small,10121_S10_1678
2,10134,41,94.74,2,3884.34,2018-07-01,703,Shipped,Motorcycles,95,...,Lyon Souveniers,+33 1 46 62 7555,27 rue du Colonel Pierre Avia,Paris,75508,France,Da Cunha,Daniel,Medium,10134_S10_1678
3,10145,45,83.26,6,3746.7,2018-08-25,649,Shipped,Motorcycles,95,...,Toys4GrownUps.com,6265557265,78934 Hillside Dr.,Pasadena,90003,USA,Young,Julie,Medium,10145_S10_1678
4,10168,36,96.66,1,3479.76,2018-10-28,586,Shipped,Motorcycles,95,...,Technics Stores Inc.,6505556809,9408 Furth Circle,Burlingame,94217,USA,Hirano,Juri,Medium,10168_S10_1678


### a. Expectations

#### 1. To Be Unique

In [14]:
# Expectation 1: Kolom 'order_product' To be unique 

validator.expect_column_values_to_be_unique('order_product')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2747,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### 2. To Be Between

In [18]:
# Expectation 2: Kolom 'quantityordered' to be between 5 and 100

validator.expect_column_values_to_be_between('quantityordered',
                                            min_value=5,
                                            max_value=100)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2747,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### 3. To Be In Set

In [None]:
# Expectation 3: Kolom 'productline' to be in set

validator.expect_column_values_to_be_in_set('productline',
                                            value_set=['Motorcycles',
                                                       'Classic Cars',
                                                       'Trucks and Buses',
                                                       'Vintage Cars',
                                                       'Planes',
                                                       'Ships',
                                                       'Trains'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2747,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### 4. To Be In Type List

In [21]:
# Expectation 4: Kolom 'priceeach' to be in type list

validator.expect_column_values_to_be_in_type_list('priceeach',
                                                type_list=['int',
                                                           'float',
                                                           'int64',
                                                           'float64']) 

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### 5. To Be Greater Than

In [None]:
# Expectation 5: Kolom 'sales' to be greater than 'priceeach'

validator.expect_column_pair_values_A_to_be_greater_than_B(column_A="sales",
                                                        column_B="priceeach")

Calculating Metrics:   0%|          | 0/7 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2747,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### 6. To Match Regex Pattern

In [None]:
# Expectation 6: Kolom 'productcode' to match regex pattern

validator.expect_column_values_to_match_regex(column="productcode",
                                            regex=r"^[A-Z0-9_]+$")

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2747,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

#### 7. Columns To Be Match Set 

In [None]:
# Expectation 7: Semua kolom Match dengan Set

validator.expect_table_columns_to_match_set(column_set=["ordernumber","quantityordered","priceeach",
                                                        "orderlinenumber","sales","orderdate",
                                                        "days_since_lastorder","status","productline",
                                                        "msrp","productcode","customername","phone",
                                                        "addressline","city","postalcode","country",
                                                        "contactlastname","contactfirstname","dealsize",
                                                        "order_product"])

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": [
      "ordernumber",
      "quantityordered",
      "priceeach",
      "orderlinenumber",
      "sales",
      "orderdate",
      "days_since_lastorder",
      "status",
      "productline",
      "msrp",
      "productcode",
      "customername",
      "phone",
      "addressline",
      "city",
      "postalcode",
      "country",
      "contactlastname",
      "contactfirstname",
      "dealsize",
      "order_product"
    ]
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### b. Checkpoint

In [48]:
# Create a checkpoint

checkpoint_m3 = context.add_or_update_checkpoint(
    name = 'project_m3_checkpoint',
    validator = validator,
)

In [49]:
# Run a checkpoint

checkpoint_result = checkpoint_m3.run()

Calculating Metrics: 0it [00:00, ?it/s]

### c. Data Docs

In [50]:
# Build data docs

context.build_data_docs()

{'local_site': 'file://e:\\Latihan\\project_m3\\gx\\uncommitted/data_docs/local_site/index.html'}