# Pointblank DuckLake Example

An example notebook showing how to use the [Pointblank](https://posit-dev.github.io/pointblank/) data validation framework with DuckLake.

Install Pointblank with DuckDB and Polars extras:

In [None]:
!pip install "pointblank[duckdb,pl]"

Import the `ibis` and `pointblank` packages:

In [2]:
import ibis
import pointblank as pb

Connect to an in-memory DuckDB database and install the `ducklake` extension:

In [3]:
con = ibis.duckdb.connect(extensions="ducklake")

Attach to DuckLake and select the `my_ducklake` catalog:

In [4]:
con.attach("ducklake:my_ducklake.ducklake")
con.raw_sql("USE my_ducklake")

<duckdb.duckdb.DuckDBPyConnection at 0x1342df870>

Create a table:

In [5]:
con.raw_sql("""
CREATE TABLE IF NOT EXISTS titanic AS
    SELECT * REPLACE (Survived::BOOLEAN AS Survived)
    FROM 'https://raw.githubusercontent.com/datasciencedojo/datasets/refs/heads/master/titanic.csv'
""")

<duckdb.duckdb.DuckDBPyConnection at 0x1342df870>

Preview the table:

In [6]:
titanic = con.table("titanic")
pb.preview(titanic)

DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12,DuckDBRows891Columns12
Unnamed: 0_level_1,PassengerIdint64,Survivedboolean,Pclassint64,Namestring,Sexstring,Agefloat64,SibSpint64,Parchint64,Ticketstring,Farefloat64,Cabinstring,Embarkedstring
1,1,False,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,2,True,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
3,3,True,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,4,True,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,5,False,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
887,887,False,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
888,888,True,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
889,889,False,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
890,890,True,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
891,891,False,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


Create a validation:

In [7]:
validation = (
    pb.Validate(
        data=titanic,
        tbl_name="titanic",                                  # Name of the table for reporting
        label="Example titanic dataset validation",          # Label for the validation, appears in reports
        thresholds=(0.01, 0.02, 0.05),                       # Thresholds for warnings, errors, and critical issues
        brief=True,                                          # Add automatically-generated briefs for each step
    )
    .col_vals_not_null(columns="PassengerId")                # Validate values not Null
    .col_exists(columns=["Name", "Ticket"])                  # Validate columns exist
    .col_vals_between(columns="Pclass", left=1, right=3)     # Validate 1 <= values <= 3
    .col_vals_in_set(columns="Sex", set=["male", "female"])  # Validate values "male" or "female"
    .col_vals_lt(columns="Age", value=60, na_pass=True)      # Validate values < 5, Null allowed
    .interrogate()                                           # Execute and collect results
)

validation

Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation
Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05,Example titanic dataset validationDuckDBtitanicWARNING0.01ERROR0.02CRITICAL0.05
Unnamed: 0_level_2,Unnamed: 1_level_2,STEP,COLUMNS,VALUES,TBL,EVAL,UNITS,PASS,FAIL,W,E,C,EXT
#4CA64C,1,col_vals_not_null  col_vals_not_null()  Expect that all values in PassengerId should not be Null.,PassengerId,—,,✓,891,891 1.00,0 0.00,○,○,○,—
#4CA64C,2,col_exists  col_exists()  Expect that column Name exists.,Name,—,,✓,1,1 1.00,0 0.00,○,○,○,—
#4CA64C,3,col_exists  col_exists()  Expect that column Ticket exists.,Ticket,—,,✓,1,1 1.00,0 0.00,○,○,○,—
#4CA64C,4,col_vals_between  col_vals_between()  Expect that values in Pclass should be between 1 and 3.,Pclass,"[1, 3]",,✓,891,891 1.00,0 0.00,○,○,○,—
#4CA64C,5,"col_vals_in_set  col_vals_in_set()  Expect that values in Sex should be in the set of male, female.",Sex,"male, female",,✓,891,891 1.00,0 0.00,○,○,○,—
#EBBC14,6,col_vals_lt  col_vals_lt()  Expect that values in Age should be < 60.,Age,60,,✓,891,865 0.97,26 0.03,●,●,○,—
2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC,2025-07-06 22:43:50 UTC< 1 s2025-07-06 22:43:51 UTC


Close the connection:

In [8]:
con.disconnect()