In [19]:
import pointblank as pb
import pandas as pd
import os

In [20]:
df = pd.read_csv("../data/drugs.csv")

In [21]:
print(list(df.columns))
print(len(df.columns))

['STATE', 'STATENAME', 'ST_CASE', 'VEH_NO', 'PER_NO', 'DRUGSPEC', 'DRUGSPECNAME', 'DRUGMETHOD', 'DRUGMETHODNAME', 'DRUGRES', 'DRUGRESNAME', 'DRUGQTY', 'DRUGQTYNAME', 'DRUGACTQTY', 'DRUGACTQTYNAME', 'DRUGUOM', 'DRUGUOMNAME']
17


In [22]:
expected_schema = pb.Schema(
    columns=[
    ('STATE' , 'int64'),
    ('STATENAME', 'object'),
    ('ST_CASE', 'int64'),
    ('VEH_NO', 'int64'),
    ('PER_NO', 'int64'),
    ('DRUGSPEC', 'int64'),
    ('DRUGSPECNAME', 'object'),
    ('DRUGMETHOD', 'int64'),
    ('DRUGMETHODNAME', 'object'),
    ('DRUGRES', 'int64'),
    ('DRUGRESNAME', 'object'),
    ('DRUGQTY', 'int64'),
    ('DRUGQTYNAME', 'object'),
    ('DRUGACTQTY', 'float64'),
    ('DRUGACTQTYNAME', 'object'),
    ('DRUGUOM', 'int64'),
    ('DRUGUOMNAME', 'object'),
    ('DRUGMETHOD_valid', 'int64')
    ])

# The DRUGMETHOD field is valid if its value is either:
# - In the range 1 to 26 (inclusive), or
# - One of the specific codes: 0, 96, 97, or 99
#
# Applying these two validation rules separately using:
#   .col_vals_between(columns="DRUGMETHOD", left=1, right=26)
#   .col_vals_in_set(columns="DRUGMETHOD", set=[00,96,97,99])
# results in partial failures, since each rule only validates part of the allowed values.
#
# To address this, we combined both conditions into a single boolean column (`DRUGMETHOD_valid`)
# indicating whether each row meets either condition.
#
# Since pointblank does not currently support validation on boolean columns,
# we convert the boolean values to integers (True → 1, False → 0),
# and then validate that all values in the new column equal 1.

df['DRUGMETHOD_valid'] = (df['DRUGMETHOD'].isin([00,96,97,99]) | ((df['DRUGMETHOD'] >=1) & (df['DRUGMETHOD'] <= 26))).astype(int)

validation_report = (
    pb.Validate(
        df, 
        label="Validate Drugs Data Report",
        thresholds=pb.Thresholds(warning=1, error= 20, critical=0.10)
        )
    .col_vals_between(columns="STATE", left=1, right=56)    # Step 1
    .col_vals_regex(columns="STATENAME", pattern=r'^[A-Za-z ]+$')   # Step 2
    .col_vals_between(columns='VEH_NO', left=0, right=99)   # Step 3
    .col_vals_between(columns='PER_NO', left=1, right=99)   # Step 4
    .col_vals_in_set(columns="DRUGSPEC", set=[0,1,2,11,12,13,14,15,96,97,98,99])    # Step 5
    .col_vals_eq(columns='DRUGMETHOD_valid', value=1)   # Step 6
    .col_vals_in_set(columns="DRUGQTY", set=[0,1,2,3,4,96,97,98,99])    # Step 7
    .col_vals_in_set(columns='DRUGUOM', set=[1,2,3,4,5,6,7,8,-9])   # Step 8
    .col_vals_not_null(columns=["PER_NO","VEH_NO","STATENAME"]) # Step 9, 10, 11 for 3 columns
    # 17 given column plus 1 additional column
    .col_count_match(18)    # Step 12
    .col_schema_match(schema = expected_schema) # Step 13
    .interrogate()
)

validation_report

Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation
Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1,Validate Drugs Data ReportPandasWARNING1ERROR20CRITICAL0.1
Unnamed: 0_level_2,Unnamed: 1_level_2,STEP,COLUMNS,VALUES,TBL,EVAL,UNITS,PASS,FAIL,W,E,C,EXT
#4CA64C,1,col_vals_between  col_vals_between(),STATE,"[1, 56]",,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,2,col_vals_regex  col_vals_regex(),STATENAME,^[A-Za-z ]+$,,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,3,col_vals_between  col_vals_between(),VEH_NO,"[0, 99]",,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,4,col_vals_between  col_vals_between(),PER_NO,"[1, 99]",,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,5,col_vals_in_set  col_vals_in_set(),DRUGSPEC,"0, 1, 2, 11, 12, 13, 14, 15, 96, 97, 98, 99",,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,6,col_vals_eq  col_vals_eq(),DRUGMETHOD_valid,1,,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,7,col_vals_in_set  col_vals_in_set(),DRUGQTY,"0, 1, 2, 3, 4, 96, 97, 98, 99",,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,8,col_vals_in_set  col_vals_in_set(),DRUGUOM,"1, 2, 3, 4, 5, 6, 7, 8, -9",,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,9,col_vals_not_null  col_vals_not_null(),PER_NO,—,,✓,130K,130K 1.00,0 0.00,○,○,○,—
#4CA64C,10,col_vals_not_null  col_vals_not_null(),VEH_NO,—,,✓,130K,130K 1.00,0 0.00,○,○,○,—


In [23]:

# Export Validation report to html
result_folder = "../validation_result"
os.makedirs(result_folder, exist_ok=True)
validation_report.get_tabular_report().write_raw_html(f"{result_folder}/drugs_data_validation_report.html")


In [24]:
# i refers to the index (position) of the validation step in the pointblank.Validate() pipeline starting from 1
validation_report.get_step_report(i=3)

Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:,Report for Validation Step 3 ✓ASSERTION 0 ≤ VEH_NO ≤ 99129629 TEST UNITS ALL PASSED IN COLUMN 4PREVIEW OF TARGET TABLE:
Unnamed: 0_level_1,STATEint64,STATENAMEobject,ST_CASEint64,VEH_NOint64,PER_NOint64,DRUGSPECint64,DRUGSPECNAMEobject,DRUGMETHODint64,DRUGMETHODNAMEobject,DRUGRESint64,DRUGRESNAMEobject,DRUGQTYint64,DRUGQTYNAMEobject,DRUGACTQTYfloat64,DRUGACTQTYNAMEobject,DRUGUOMint64,DRUGUOMNAMEobject,DRUGMETHOD_validint64
1,1,Alabama,10001,1,1,0,Test Not Given,0,Test Not Given,0,Test Not Given,0,Test Not Given,-99.0,Not Applicable,-9,Not Applicable,1
2,1,Alabama,10002,1,1,1,Whole Blood,19,Unknown Confirmatory Test Method,3033,Cocaine,2,Actual Drug Quantity,20.0,20.000,5,ng/mL,1
3,1,Alabama,10002,1,1,1,Whole Blood,19,Unknown Confirmatory Test Method,3022,Benzoylecgonine,2,Actual Drug Quantity,39.0,39.000,5,ng/mL,1
4,1,Alabama,10002,1,1,1,Whole Blood,19,Unknown Confirmatory Test Method,3053,Methamphetamine,2,Actual Drug Quantity,200.0,200.000,5,ng/mL,1
5,1,Alabama,10002,1,1,1,Whole Blood,19,Unknown Confirmatory Test Method,5060,Delta 9-tetrahydrocannabinol [THC],2,Actual Drug Quantity,7.9,7.900,5,ng/mL,1
129625,56,Wyoming,560120,1,4,0,Test Not Given,0,Test Not Given,0,Test Not Given,0,Test Not Given,-99.0,Not Applicable,-9,Not Applicable,1
129626,56,Wyoming,560120,1,5,0,Test Not Given,0,Test Not Given,0,Test Not Given,0,Test Not Given,-99.0,Not Applicable,-9,Not Applicable,1
129627,56,Wyoming,560120,2,1,0,Test Not Given,0,Test Not Given,0,Test Not Given,0,Test Not Given,-99.0,Not Applicable,-9,Not Applicable,1
129628,56,Wyoming,560121,1,1,1,Whole Blood,9,Unknown Screening Test Method,3026,Caffeine,3,Presumptive Positive,-99.0,Not Applicable,-9,Not Applicable,1
129629,56,Wyoming,560121,1,1,1,Whole Blood,9,Unknown Screening Test Method,3034,Cotinine,3,Presumptive Positive,-99.0,Not Applicable,-9,Not Applicable,1
