# ABAC - Row Filtering and Column Masking


In [0]:
%sql
drop schema if exists dkushari_uc.fgac_abac cascade;
create schema if not exists dkushari_uc.fgac_abac managed location "s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-1/dkushari/f_g_a_c/fgac_abac/";

In [0]:
%sql
use catalog dkushari_uc;
use fgac_abac;



In [0]:
%sql
select current_catalog();



In [0]:
%sql
select current_schema();



In [0]:
%sql 
DROP TABLE IF EXISTS customer_pii_data_delta_1;
DROP TABLE IF EXISTS customer_pii_data_delta_2;



In [0]:
from pyspark.sql.functions import expr, lit, rand
from pyspark.sql.types import IntegerType

# Generate a DataFrame with 50 records
df = spark.range(50).withColumnRenamed("id", "record_id")

# Add PII information (dummy data for illustration)
df = df.withColumn("first_name", expr("CASE WHEN rand() < 0.2 THEN 'John' " +
                                      "WHEN rand() < 0.4 THEN 'Jane' " +
                                      "WHEN rand() < 0.6 THEN 'Doe' " +
                                      "WHEN rand() < 0.8 THEN 'Alice' " +
                                      "ELSE 'Bob' END")) \
      .withColumn("last_name", expr("CASE WHEN rand() < 0.2 THEN 'Smith' " +
                                    "WHEN rand() < 0.4 THEN 'Johnson' " +
                                    "WHEN rand() < 0.6 THEN 'Williams' " +
                                    "WHEN rand() < 0.8 THEN 'Brown' " +
                                    "ELSE 'Jones' END")) \
      .withColumn("date_of_birth", expr("CASE WHEN rand() < 0.2 THEN '1980-01-01' " +
                                        "WHEN rand() < 0.4 THEN '1990-01-01' " +
                                        "WHEN rand() < 0.6 THEN '2000-01-01' " +
                                        "WHEN rand() < 0.8 THEN '1985-01-01' " +
                                        "ELSE '1995-01-01' END")) \
      .withColumn("age", (lit(2023) - expr("substring(date_of_birth, 1, 4)")).cast(IntegerType())) \
      .withColumn("gender", expr("CASE WHEN rand() < 0.5 THEN 'M' ELSE 'F' END")) \
      .withColumn("address", expr("CASE WHEN rand() < 0.2 THEN '123 Main St' " +
                                  "WHEN rand() < 0.4 THEN '456 Elm St' " +
                                  "WHEN rand() < 0.6 THEN '789 Pine St' " +
                                  "WHEN rand() < 0.8 THEN '101 Oak St' " +
                                  "ELSE '202 Maple St' END")) \
      .withColumn("ssn", expr("CASE WHEN rand() < 0.2 THEN '111-11-1111' " +
                              "WHEN rand() < 0.4 THEN '222-22-2222' " +
                              "WHEN rand() < 0.6 THEN '333-33-3333' " +
                              "WHEN rand() < 0.8 THEN '444-44-4444' " +
                              "ELSE '555-55-5555' END")) \
      .withColumn("region", expr("CASE WHEN rand() < 0.2 THEN 'Northeast' " +
                                 "WHEN rand() < 0.4 THEN 'Midwest' " +
                                 "WHEN rand() < 0.6 THEN 'South' " +
                                 "WHEN rand() < 0.8 THEN 'West' " +
                                 "ELSE 'Southwest' END"))

# Display the DataFrame
# display(df)

# Write the DataFrame to a Unity Catalog table
df.write.format("delta").mode("overwrite").option("path","s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-1/dkushari/f_g_a_c/fgac_abac/external-table/customer_pii_data_delta_1").saveAsTable("customer_pii_data_delta_1")



In [0]:
%sql
select * from customer_pii_data_delta_1 limit 5;



In [0]:
%sql
insert into customer_pii_data_delta_1
select * from customer_pii_data_delta_1 limit 100;



In [0]:
%sql
insert into customer_pii_data_delta_1
select * from customer_pii_data_delta_1 where first_name = 'Doe' and last_name = 'Williams'



In [0]:
%sql
select count(*) from customer_pii_data_delta_1;



In [0]:
%sql
create table if not exists customer_pii_data_delta_2 deep clone customer_pii_data_delta_1;



## Customer Table with PII data

In [0]:
%sql
SELECT region, COUNT(*) AS total_customers
FROM customer_pii_data_delta_1
GROUP BY region;



In [0]:
%sql
SELECT region, COUNT(*) AS total_customers
FROM customer_pii_data_delta_2
GROUP BY region;



In [0]:
%sql
use catalog dkushari_uc;
use fgac_abac;



# Create ABAC Row Level Filtering UC functions 

##### Add tags to the table columns

In [0]:
%sql
ALTER  TABLE customer_pii_data_delta_1
ALTER COLUMN ssn
SET TAGS('sensitive_pii' = 'ssn');



In [0]:
%sql
ALTER  TABLE customer_pii_data_delta_2
ALTER COLUMN ssn
SET TAGS('sensitive_pii' = 'ssn');



In [0]:
%sql
ALTER  TABLE customer_pii_data_delta_1
ALTER COLUMN address
SET TAGS('sensitive_pii' = 'address');



In [0]:
%sql
ALTER  TABLE customer_pii_data_delta_2
ALTER COLUMN address
SET TAGS('sensitive_pii' = 'address');



In [0]:
%sql
ALTER  TABLE customer_pii_data_delta_1
ALTER COLUMN region
SET TAGS('geo_region');



In [0]:
%sql
ALTER  TABLE customer_pii_data_delta_2
ALTER COLUMN region
SET TAGS('geo_region');



##### Define the row filter function

In [0]:
%sql
drop function if exists abac_row_filter;
CREATE FUNCTION abac_row_filter (column_1 STRING, columne_value_1 STRING) 
RETURNS BOOLEAN
  RETURN column_1 <> columne_value_1;




##### Define the Policy at the Schema level using the row filter function for group_1

In [0]:
%sql
drop policy hide_west_region_customers on schema dkushari_uc.fgac_abac;



# Policy to Hide customers from West region for sensitive tables

In [0]:
%sql
CREATE or REPLACE POLICY hide_west_region_customers 
ON SCHEMA dkushari_uc.fgac_abac
COMMENT 'Hide customers from West region for sensitive tables'
ROW FILTER abac_row_filter
TO demo_fgac_group_1
FOR TABLES
MATCH COLUMNS 
  hasTag('geo_region') AS region  
USING COLUMNS(region, 'West');




##### User is part of group_1, which is only allowed to see the data after filter based on the function definition

In [0]:
%sql
SELECT is_account_group_member('demo_fgac_group_1') as group_1, is_account_group_member('demo_fgac_group_2') as group_2;



In [0]:
%sql
select * from customer_pii_data_delta_1 where region='West' limit 3;



In [0]:
%sql
select * from customer_pii_data_delta_2 where region='West' limit 3;



In [0]:
%sql
select * from customer_pii_data_delta_1 limit 5;



In [0]:
%sql
select * from customer_pii_data_delta_2 where region='Midwest' limit 3;



# Create ABAC column masking UC functions 

In [0]:
%sql
DROP FUNCTION IF EXISTS abac_col_mask;
CREATE OR REPLACE FUNCTION abac_col_mask (column_1 STRING) 
RETURN '❌❌❌❌❌' ;




In [0]:
%sql
drop policy  mask_ssn on schema dkushari_uc.fgac_abac;
drop policy mask_address on schema dkushari_uc.fgac_abac;



In [0]:
%sql
CREATE OR REPLACE POLICY mask_ssn 
ON SCHEMA dkushari_uc.fgac_abac
COMMENT 'mask ssn value'
COLUMN MASK abac_col_mask
TO demo_fgac_group_1
EXCEPT demo_fgac_group_2
FOR TABLES
MATCH COLUMNS 
  hasTagValue('sensitive_pii','ssn') AS ssn  
ON COLUMN ssn;




In [0]:
%sql
CREATE OR REPLACE POLICY mask_address
ON SCHEMA dkushari_uc.fgac_abac
COMMENT 'mask the address'
COLUMN MASK abac_col_mask
TO demo_fgac_group_1
EXCEPT demo_fgac_group_2
FOR TABLES
MATCH COLUMNS 
  hasTagValue('sensitive_pii','address') AS address  
ON COLUMN address;




##### User is part of group_1, hence the ssn and address data will be masked

In [0]:
%sql
SELECT is_account_group_member('demo_fgac_group_1') as group_1, is_account_group_member('demo_fgac_group_2') as group_2;



In [0]:
%sql
select * from customer_pii_data_delta_1 limit 5;



In [0]:
%sql
select * from customer_pii_data_delta_2 limit 5;



In [0]:
%sql
select * from customer_pii_data_delta_2 where region='West' limit 5;



##### User is now part of both group_1 and group_2, so ssn and address data will not be masked, since **_EXCEPT_** takes precedence

In [0]:
%sql
SELECT is_account_group_member('demo_fgac_group_1') as group_1, is_account_group_member('demo_fgac_group_2') as group_2;



In [0]:
%sql
select * from customer_pii_data_delta_1 limit 5;



In [0]:
%sql
select * from customer_pii_data_delta_2 limit 5;



In [0]:
%sql
select * from customer_pii_data_delta_2 where region='West' limit 5;



##### Now lets update the ABAC row filter policy to include EXCEPT

In [0]:
%sql
CREATE or REPLACE POLICY hide_west_region_customers 
ON SCHEMA dkushari_uc.fgac_abac
COMMENT 'Hide customers from West region for sensitive tables'
ROW FILTER abac_row_filter
TO demo_fgac_group_1
EXCEPT demo_fgac_group_2
FOR TABLES
MATCH COLUMNS 
  hasTag('geo_region') AS region  
USING COLUMNS(region, 'West');




-- Original Policy with no Except Clause
-- %sql
-- CREATE or REPLACE POLICY hide_west_region_customers 
-- ON SCHEMA dkushari_uc.fgac_abac
-- COMMENT 'Hide customers from West region for sensitive tables'
-- ROW FILTER abac_row_filter
-- TO demo_fgac_group_1
-- FOR TABLES
-- MATCH COLUMNS 
--   hasTag('geo_region') AS region  
-- USING COLUMNS(region, 'West');



In [0]:
%sql
select * from customer_pii_data_delta_2 where region='West' limit 5;



In [0]:
%sql
desc history customer_pii_data_delta_1;



In [0]:
%sql
select * from customer_pii_data_delta_1@v1 limit 5;



##### Removing the EXCEPT clause

In [0]:
%sql
CREATE or REPLACE POLICY hide_west_region_customers 
ON SCHEMA dkushari_uc.fgac_abac
COMMENT 'Hide customers from West region for sensitive tables'
ROW FILTER abac_row_filter
TO demo_fgac_group_1
FOR TABLES
MATCH COLUMNS 
  hasTag('geo_region') AS region  
USING COLUMNS(region, 'West');



In [0]:
%sql
select * from customer_pii_data_delta_1@v1 limit 5;



In [0]:
%sql
DROP FUNCTION IF EXISTS abac_col_mask_complex;
CREATE OR REPLACE FUNCTION abac_col_mask_complex (column_1 STRING, column_2 STRING, column_2_value STRING) 
RETURN IF (column_2=column_2_value, column_1, '❌❌❌❌❌') ;



In [0]:
%sql
drop policy mask_ssn on schema dkushari_uc.fgac_abac;



In [0]:
%sql
CREATE OR REPLACE POLICY mask_ssn 
ON SCHEMA dkushari_uc.fgac_abac
COMMENT 'mask ssn value'
COLUMN MASK abac_col_mask_complex
TO demo_fgac_group_1
FOR TABLES
MATCH COLUMNS 
  hasTagValue('sensitive_pii','ssn') AS ssn
 ,hasTag('gender') as gender
ON COLUMN ssn
using columns(gender,'M');




In [0]:
%sql
select * from customer_pii_data_delta_1 limit 10;



In [0]:
%sql
CREATE OR REPLACE POLICY mask_ssn 
ON SCHEMA dkushari_uc.fgac_abac
COMMENT 'mask ssn value'
COLUMN MASK abac_col_mask_complex
TO demo_fgac_group_1
FOR TABLES
MATCH COLUMNS 
  hasTagValue('sensitive_pii','ssn') AS ssn
 ,hasTag('gender') as gender
ON COLUMN ssn
using columns(gender,'');



In [0]:
%sql
select * from customer_pii_data_delta_1 limit 10;



In [0]:
%sql
use catalog dkushari_uc;

use fgac_abac;

drop table if exists customers_iceberg;

CREATE TABLE IF NOT EXISTS customers_iceberg (
  c_custkey BIGINT,
  c_name STRING,
  c_address STRING,
  c_nationkey BIGINT,
  c_phone STRING,
  c_acctbal DECIMAL(18, 2),
  c_mktsegment STRING,
  c_comment STRING
) USING iceberg;



In [0]:
%sql
desc extended customers_iceberg;




In [0]:
%sql
select * from customers_iceberg;



In [0]:
%sql
insert into customers_iceberg select * from samples.tpch.customer;



In [0]:
%sql
show create table samples.tpch.customer;



In [0]:
%sql
select * from samples.tpch.customer limit 10;

