# **Great Expectation**

Nama: Elia Oktaviani

Informasi: Pengerjaan dilakukan di Google Collab, dan kemudian download untuk push github.

## Install Library

In [None]:
# Install the library

!pip install -q great-expectations

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m40.8 MB/s[0m eta [36m0:00:00[0m
[?25h

## Import Library and Create Data Context

In [None]:
# Create a data context

from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

## Prepare data to be processed

In [None]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'milestone3'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'customer-behaviour'
path_to_data = 'cleancustomer.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

In [None]:
# Creat an expectation suite
expectation_suite_name = 'expectation-customer-behaviour'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()




Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0.1,Unnamed: 0,index,date,year,month,customer_age,customer_gender,country,state,product_category,sub_category,quantity,unit_cost,unit_price,cost,revenue
0,0,0,02/19/16,2016,February,29,F,United States,Washington,Accessories,Tires and Tubes,1.0,80.0,109.0,80.0,109.0
1,1,1,02/20/16,2016,February,29,F,United States,Washington,Clothing,Gloves,2.0,24.5,28.5,49.0,57.0
2,2,2,02/27/16,2016,February,29,F,United States,Washington,Accessories,Tires and Tubes,3.0,3.67,5.0,11.0,15.0
3,3,3,03/12/16,2016,March,29,F,United States,Washington,Accessories,Tires and Tubes,2.0,87.5,116.5,175.0,233.0
4,4,4,03/12/16,2016,March,29,F,United States,Washington,Accessories,Tires and Tubes,3.0,35.0,41.666667,105.0,125.0


## Check Expectation

In [None]:
# Expectation 1 : Column `index` should be unique

validator.expect_column_values_to_be_unique('index')




Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_unique",
    "kwargs": {
      "column": "index",
      "batch_id": "milestone3-customer-behaviour"
    },
    "meta": {}
  },
  "result": {
    "element_count": 34866,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 1 : Column `year` should be from 2015 to 2016
validator.expect_column_values_to_be_between(
    column='year', min_value=2015, max_value=2016
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_between",
    "kwargs": {
      "column": "year",
      "min_value": 2015,
      "max_value": 2016,
      "batch_id": "milestone3-customer-behaviour"
    },
    "meta": {}
  },
  "result": {
    "element_count": 34866,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 3 : Column `customer_gender` must contain one of the following 2 things :
# F = Female
# M = Male


validator.expect_column_values_to_be_in_set('customer_gender', ["F", "M"])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])



{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_in_set",
    "kwargs": {
      "column": "customer_gender",
      "value_set": [
        "F",
        "M"
      ],
      "batch_id": "milestone3-customer-behaviour"
    },
    "meta": {}
  },
  "result": {
    "element_count": 34866,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 4 : Column `cost` must in form of integer or float

validator.expect_column_values_to_be_in_type_list('cost', ['integer', 'float'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_in_type_list",
    "kwargs": {
      "column": "cost",
      "type_list": [
        "integer",
        "float"
      ],
      "batch_id": "milestone3-customer-behaviour"
    },
    "meta": {}
  },
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 5 : Column `date` must be in date format DD/MM/YY

validator.expect_column_values_to_match_regex("date", "\d{2}/\d{2}/\d{2}")



  validator.expect_column_values_to_match_regex("date", "\d{2}/\d{2}/\d{2}")



Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_values_to_match_regex",
    "kwargs": {
      "column": "date",
      "regex": "\\d{2}/\\d{2}/\\d{2}",
      "batch_id": "milestone3-customer-behaviour"
    },
    "meta": {}
  },
  "result": {
    "element_count": 34866,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 6 : Column `quantity` must be greater than or equal 1

validator.expect_column_min_to_be_between("quantity", 1)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_min_to_be_between",
    "kwargs": {
      "column": "quantity",
      "min_value": 1,
      "batch_id": "milestone3-customer-behaviour"
    },
    "meta": {}
  },
  "result": {
    "observed_value": 1.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 7 : Column `unit_cost` maximum value should be not greater than 3240

validator.expect_column_max_to_be_between("unit_cost", 3240)




Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "expectation_config": {
    "expectation_type": "expect_column_max_to_be_between",
    "kwargs": {
      "column": "unit_cost",
      "min_value": 3240,
      "batch_id": "milestone3-customer-behaviour"
    },
    "meta": {}
  },
  "result": {
    "observed_value": 3240.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}