# Data Context

In [9]:
# Create a data context

from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir='./')

# Connect data source

In [10]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'csv-smartphones-sales'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'smartphones-sales'
path_to_data = 'hafidz_clean.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

# Create an expectation

In [28]:
# Create an expectation suite
expectation_suite_name = 'expectation-smartphones-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,brands,colors,memory,storage,camera,rating,selling_price,original_price,mobile,discount,discount_percentage,id
0,SAMSUNG,Mirage Black,8 GB,128 GB,Yes,4.3,19330,20999,SAMSUNG GALAXY M31S,1669,7.947998,1
1,Nokia,Steel,2 GB,16 GB,Yes,3.8,10199,10199,Nokia 3.2,0,0.0,2
2,Infinix,Ice Blue,4 GB,64 GB,Yes,4.2,12999,12999,Infinix Note 5,0,0.0,3
3,Apple,Black,4GB,64 GB,Yes,4.6,49900,49900,Apple iPhone 11,0,0.0,4
4,GIONEE,Black,8 MB,16 MB,Yes,4.0,2199,2199,GIONEE L800,0,0.0,5


## to be unique

In [33]:
# Expectation 1 : Column `mobile` harus unique (tidak ada yg sama)

validator.expect_column_values_to_be_unique('id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2806,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## to be between min_value and max_value

In [44]:
# Expectation 2 : Kolom `rating` harus berada di antara 1 dan 5
validator.expect_column_values_to_be_between(
    column='rating', min_value=1, max_value=5
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2806,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## to be in set

In [45]:
# Expectation 3 : Kolom `brands` harus berisi salah satu dari daftar nilai yang ditentukan
'''SAMSUNG', 'Nokia', 'Infinix', 'Apple', 'GIONEE', 'Xiaomi',
    'realme', 'OPPO', 'vivo', 'Lenovo', 'ASUS', 'Motorola', 'HTC',
    'Google Pixel', 'LG', 'POCO', 'IQOO'''


validator.expect_column_values_to_be_in_set('brands', [
    'SAMSUNG', 'Nokia', 'Infinix', 'Apple', 'GIONEE', 'Xiaomi',
    'realme', 'OPPO', 'vivo', 'Lenovo', 'ASUS', 'Motorola', 'HTC',
    'Google Pixel', 'LG', 'POCO', 'IQOO'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2806,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## to be in type list

In [46]:
# Expectation 4 : Kolom `colors` harus memiliki tipe data string

validator.expect_column_values_to_be_in_type_list('colors', ['str'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2806,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## the number of rows to be between

In [47]:
# Expectation 5 : Jumlah baris dalam tabel harus berada antara 1 hingga 5000

validator.expect_table_row_count_to_be_between(min_value=1, max_value=5000)

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 2806
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Value length to be between

In [48]:
# Expectation 6 : Panjang nilai dalam kolom `mobile` harus berada dalam rentang 5 hingga 50 karakter

validator.expect_column_value_lengths_to_be_between(
    column='mobile', min_value=5, max_value=50
)

Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2806,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [49]:
# Expectation 7 : Kolom `memory` tidak boleh mengandung nilai kosong

validator.expect_column_values_to_not_be_null('memory')

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 2806,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# Checkpoints

Checkpoint dalam **Great Expectations** berfungsi untuk menjalankan dan menyimpan hasil validasi data secara otomatis. Dengan checkpoint, Anda dapat mengelola, menjalankan, dan mereplikasi validasi pada dataset secara konsisten tanpa harus mendefinisikan ulang aturan validasi setiap kali.

In [51]:
# Create a checkpoint

checkpoint = context.add_or_update_checkpoint(
    name = 'checkpoint',
    validator = validator,
)
result = checkpoint.run()

Calculating Metrics: 0it [00:00, ?it/s]

# Data Docs

**Data Docs** dalam Great Expectations adalah laporan visual otomatis yang menampilkan hasil validasi data. Data Docs membantu memeriksa apakah data memenuhi ekspektasi yang telah ditetapkan serta termasuk ringkasan validasi, detail ekspektasi, dan status setiap validasi (true/false).

In [52]:
# Build data docs

context.build_data_docs()

{'local_site': 'file://d:\\Hacktiv8\\Percobaan milestone\\p-2\\cobam\\dags\\gx\\uncommitted/data_docs/local_site/index.html'}