In [1]:
!pip install cuallee
!pip install duckdb



# Experiments with Cuallee
## 1. Example using Pandas

In [2]:
import pandas as pd
import numpy as np
data = {
    "transaction_id": [101, 102, 103, np.nan, 105, 102],
    "store_location": ["Paris", "London", "Berlin", "Paris", "Unknown", "Bogota"],
    "amount": [50.5, 20.0, -15.0, 100.0, 60.0, 10],
    "payment_type": ["Card", "Cash", "Card", "Crypto", "Cash", "Card"]
}
sales_data = pd.DataFrame(data)

In [3]:
sales_data

Unnamed: 0,transaction_id,store_location,amount,payment_type
0,101.0,Paris,50.5,Card
1,102.0,London,20.0,Cash
2,103.0,Berlin,-15.0,Card
3,,Paris,100.0,Crypto
4,105.0,Unknown,60.0,Cash
5,102.0,Bogota,10.0,Card


In [4]:
from cuallee import Check, CheckLevel

# Define the check and chain the rules
check = Check(CheckLevel.WARNING, "Sales_Data_Validation")

results_exp1 = (
    check
    .is_complete("transaction_id")
    .is_unique("transaction_id")
    .is_positive("amount")
    .is_in("payment_type", ["Card", "Cash"])
    .is_legit("store_location")
    .validate(sales_data)
)

In [5]:
results_exp1

Unnamed: 0,id,timestamp,check,level,column,rule,value,rows,violations,pass_rate,pass_threshold,status
0,1,2025-12-09 22:07:23,Sales_Data_Validation,WARNING,transaction_id,is_complete,,6,1,0.833333,1.0,FAIL
1,2,2025-12-09 22:07:23,Sales_Data_Validation,WARNING,transaction_id,is_unique,,6,2,0.666667,1.0,FAIL
2,3,2025-12-09 22:07:23,Sales_Data_Validation,WARNING,amount,is_greater_than,0,6,1,0.833333,1.0,FAIL
3,4,2025-12-09 22:07:23,Sales_Data_Validation,WARNING,payment_type,is_in,"(Card, Cash)",6,1,0.833333,1.0,FAIL
4,5,2025-12-09 22:07:23,Sales_Data_Validation,WARNING,store_location,is_legit,^\S+$,6,0,1.0,1.0,PASS


## 2. Example using DuckDB

In [6]:
import duckdb
conn = duckdb.connect(":memory:")

# Creation of the table in DuckDB
conn.sql("CREATE OR REPLACE TABLE duckdb_sales_data AS (SELECT * FROM sales_data)")

check = Check(CheckLevel.WARNING, "DuckDB", table_name="duckdb_sales_data")

results_exp2 = (
    check
    .is_complete("transaction_id")
    .is_unique("transaction_id")
    .is_positive("amount")
    .is_in("payment_type", ["Card", "Cash"])
    .is_legit("store_location")
    .validate(conn)
)

In [7]:
results_exp2

Unnamed: 0,id,timestamp,check,level,column,rule,value,rows,violations,pass_rate,pass_threshold,status
0,1,2025-12-09 22:07:23,DuckDB,WARNING,transaction_id,is_complete,,6,1.0,0.833333,1.0,FAIL
1,2,2025-12-09 22:07:23,DuckDB,WARNING,transaction_id,is_unique,,6,2.0,0.666667,1.0,FAIL
2,3,2025-12-09 22:07:23,DuckDB,WARNING,amount,is_greater_than,0,6,1.0,0.833333,1.0,FAIL
3,4,2025-12-09 22:07:23,DuckDB,WARNING,payment_type,is_in,"(Card, Cash)",6,1.0,0.833333,1.0,FAIL
4,5,2025-12-09 22:07:23,DuckDB,WARNING,store_location,is_legit,^\S+$,6,0.0,1.0,1.0,PASS
