# 1. Import Libraries

In [2]:
import pandas as pd
import great_expectations as ge
from great_expectations.data_context import FileDataContext

# 2. Create Data Context, Datasource, Data Asset, and Connect to A `Datasource`

In [3]:
# Create a data context

context = FileDataContext.create(project_root_dir='./')

# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'csv-milestone3'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'milestone3'
data = 'P2M3_muhammad_damar_data_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=data)

# Build batch request
batch_request = asset.build_batch_request()

Membuat konteks data , datasource dengan `csv-milestone3` untuk nama filenya, data asset dengan `milestone3` untuk nama filenya, lalu mengkoneksikan dengan data source.

# 3. Create an Expectation Suite

In [4]:
# Create an expectation suite
expectation_suite_name = 'expectation-supermarket-sales'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,invoice_id,branch,city,customer_type,gender,product_line,unit_price,quantity,tax_5percent,total,date,time,payment,cogs,gross_margin_percentage,gross_income,rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.14,548.97,2019-01-05,13:08:00,Ewallet,522.83,4.76,26.14,9.1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,2019-03-08,10:29:00,Cash,76.4,4.76,3.82,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.22,340.53,2019-03-03,13:23:00,Credit card,324.31,4.76,16.22,7.4
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.29,489.05,2019-01-27,20:33:00,Ewallet,465.76,4.76,23.29,8.4
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.21,634.38,2019-02-08,10:37:00,Ewallet,604.17,4.76,30.21,5.3


Membuat expectation suite dan validatornya.

## 3.1. Expectations

### 3.1.1. Must Be Unique

In [5]:
# Expectation 1 : Column `mission_id` must be unique

validator.expect_column_values_to_be_unique('invoice_id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

Ekspektasi 1, kolom 'invoice_id' harus unique.

### 3.1.2. To Be Between min_value And max_value

In [7]:
validator.expect_column_values_to_be_between('quantity', min_value=1, max_value=100)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

Ekspektasi 2, kolom 'quantity' harus berada diantara min (1) dan max (100) value.

### 3.1.3. To Be In Set

In [9]:
validator.expect_column_values_to_be_in_set('branch', ['A','B','C'])


Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

Ekspektasi 3, kolom 'branch' harus berada di set ['A', 'B', 'C'].

### 3.1.4. To Be In Type List

In [11]:
validator.expect_column_values_to_be_of_type('customer_type', 'str')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

Ekspektasi 4, kolom 'customer_type' harus string tipe datanya.

### 3.1.5. No Null in 'total'

In [12]:
validator.expect_column_values_to_not_be_null('total')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

Ekspektasi 5, kolom 'total' tidak boleh ada nilai null.

### 3.1.6. Nilai Payment tidak boleh kosong

In [13]:
validator.expect_column_values_to_not_match_regex('payment', '^$')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

Ekspektasi 6, nilai pada kolom 'payment' tidak boleh kosong.

### 3.1.7. Rata-rata Rating di Atas Nilai Minimum (expect_column_mean_to_be_between)

In [18]:
validator.expect_column_mean_to_be_between(column='rating', min_value=5, max_value=10)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 6.9727
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

Ekspektasi 7, nilai rata-rata dari kolom 'rating' harus berada diatas nilai minimum.

In [19]:
# Save into Expectation Suite

validator.save_expectation_suite(discard_failed_expectations=False)

Menyimpan Expectationnya.

## III.2 - Checkpoint

In [20]:
# Create a checkpoint

checkpoint_1 = context.add_or_update_checkpoint(
    name = 'checkpoint_1',
    validator = validator,)

Membuat cekpoin untuk menjalankan program.

# IV. Run Checkpoint

In [21]:
# Lihat checkpoint object
print(checkpoint_1)

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction"
      }
    }
  ],
  "batch_request": {},
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "module_name": "great_expectations.checkpoint",
  "name": "checkpoint_1",
  "profilers": [],
  "runtime_configuration": {},
  "validations": [
    {
      "batch_request": {
        "datasource_name": "csv-milestone3",
        "data_asset_name": "milestone3",
        "options": {}
      },
      "expectation_suite_name": "expectation-supermarket-sales"
    }
  ]
}


In [22]:
# Jalankan checkpoint
checkpoint_result = checkpoint_1.run()
# Lihat hasil validasi
print(checkpoint_result)

Calculating Metrics:   0%|          | 0/44 [00:00<?, ?it/s]

{
  "run_id": {
    "run_name": null,
    "run_time": "2026-01-25T21:26:00.043098+07:00"
  },
  "run_results": {
    "ValidationResultIdentifier::expectation-supermarket-sales/__none__/20260125T142600.043098Z/csv-milestone3-milestone3": {
      "validation_result": {
        "success": true,
        "results": [
          {
            "success": true,
            "expectation_config": {
              "expectation_type": "expect_column_values_to_be_unique",
              "kwargs": {
                "column": "invoice_id",
                "batch_id": "csv-milestone3-milestone3"
              },
              "meta": {}
            },
            "result": {
              "element_count": 1000,
              "unexpected_count": 0,
              "unexpected_percent": 0.0,
              "partial_unexpected_list": [],
              "missing_count": 0,
              "missing_percent": 0.0,
              "unexpected_percent_total": 0.0,
              "unexpected_percent_nonmissing": 0.0,
    

Semua ekspektasi terbukti benar, maka dari itu dataset ini memiliki 100% expectation.

# V. Build Data Docs

In [23]:
# Build data docs

context.build_data_docs()

{'local_site': 'file://c:\\Users\\damar\\OneDrive\\Desktop\\Hacktiv8\\Phase-2\\MS\\p2-ftds-m3-damarnieh\\gx\\uncommitted/data_docs/local_site/index.html'}