In [None]:
%load_ext autoreload
%autoreload 2

In [546]:
def unit_test_rectangles():
    import numpy as np
    import matplotlib.pyplot as plt
    from groups import get_rectangles
    
    np.random.seed(123)

    X1 = np.random.choice([0,1], size=(3,)) 
    X2 = np.random.choice(np.arange(2), size=(3,))

    X = np.stack((X1, X2), axis=1)
    
    rects = get_rectangles(X)
    
    true_rects = np.asarray([
        [False, True, True], 
        [True, False, True], 
        [False, True, True]
    ])
    return (rects == true_rects).all()

def unit_test_intervals():
    # verify that rectangles score matches interval score

    from auditor import Auditor
    from metrics import Metric
    from groups import get_rectangles
    from scipy.stats import norm

    rng = np.random.default_rng(seed = 0)

    X = rng.uniform(0, 5, (100,1))
    Y = X + 0.1 * rng.standard_normal(size=(100,1))
    Z1 = X - norm.ppf(0.95, scale=0.1)
    Z2 = X + norm.ppf(0.95, scale=0.5)
    Z = np.hstack([Z1, Z2])

    metric = Metric("equalized_coverage")

    auditor = Auditor(X=X, Y=Y, Z=Z, metric=metric)
    auditor.calibrate_groups(
        alpha=0.05,
        type="upper",
        groups="intervals",
        epsilon=0.55
    )

    cert, metric = auditor.query_group(
        X <= 1
    )
    
    intervals = get_rectangles(X)

    auditor.calibrate_groups(
        alpha=0.05,
        type="upper",
        groups=intervals,
        epsilon=0.55
    )

    cert_bf, metric = auditor.query_group(
        X <= 1
    )
    return np.all(cert == cert_bf)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 13671.31it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 1737.42it/s]


True

In [319]:
from itertools import combinations 

discretization = {1 : [2,3]}

for idx, disc in discretization.items():
    X[:,idx] = np.digitize(
        X[:,idx],
        disc
    )


feature_list = np.arange(X.shape[1])
depth = X.shape[1]
all_groups = []
group_indices = []
for n_intersect in range(1, depth + 1):
    for c in combinations(feature_list, n_intersect):
        unique_groups, indices = np.unique(X[:,c], return_inverse=True, axis=0)

        # track groups
        all_groups.extend([{c_ind : g_val for c_ind, g_val in zip(c, g)} for g in unique_groups])

        # generate dummies
        dummies = np.zeros((X.shape[0], len(unique_groups)), dtype=int)
        dummies[(range(X.shape[0]), indices)] = int(1)
        group_indices.append(dummies)

group_indices = np.concatenate(group_indices, axis=1, dtype=int)

In [358]:
arr = np.random.choice([0,1], size=(5,3)).astype(bool)
print(arr)

print(np.amax(arr, axis=1))


[[ True  True False]
 [ True False  True]
 [False False False]
 [False False  True]
 [ True False False]]
[ True  True False  True  True]


In [350]:
np.random.seed(0)
mat = np.random.uniform(size=(5,2)).astype(bool)
prod = np.einsum('ij,ik->kij', group_indices.astype(bool),mat)

print(prod)



[[[ True False  True  True False]
  [False  True  True False  True]
  [ True False  True  True False]
  [ True False  True  True False]
  [False  True  True False  True]]

 [[ True False  True  True False]
  [False  True  True False  True]
  [ True False  True  True False]
  [ True False  True  True False]
  [False  True  True False  True]]]


In [10]:
from certify import Certifier
from metrics import Metric
import pandas as pd

audit_trail = pd.read_csv("/Users/cherian/Projects/fairaudit/audit_trail.csv")


auditor = Certifier(audit_trail.x, audit_trail.y, audit_trail.z, Metric(name="statistical_parity"))




TypeError: Metric.__init__() got an unexpected keyword argument 'name'

In [232]:
import numpy as np

X = np.random.choice(np.arange(4), (500, 4))

n = X.shape[0]
p = X.shape[1]

coordinate_dummies = []
for c in range(p):
    unique_vals, indices = np.unique(X[:,c], return_inverse=True, axis=0)

    num_unique = len(unique_vals)
    # generate unique dummies
    unique_dummies = np.zeros((n, num_unique), dtype=int)
    unique_dummies[(range(n), indices)] = int(1)

    num_intervals = (num_unique * (num_unique + 1)) // 2
    interval_dummies = np.zeros((n, num_intervals), dtype=int)

    idx = num_unique
    add_dummies = np.cumsum(unique_dummies, axis=1, dtype=int)
    interval_dummies[:,0:idx] = add_dummies
    
    for c_prime in range(1, len(unique_vals)):
        # update dummies by subtracting out contribution from first unique dummy
        # and removing first column
        num_added = num_unique - c_prime
        add_dummies = add_dummies[:,1:num_unique] - add_dummies[:,0,None]
        interval_dummies[:,idx:(idx + num_added)] = add_dummies
        idx += num_added

    interval_dummies = interval_dummies.clip(max=int(1))
    coordinate_dummies.append(interval_dummies)

In [249]:
chars = [chr(idx + 97) for idx in range(p)]
einsum_str = ','.join(f'i{c}' for c in chars)
einsum_str += '->i' + ''.join(chars)
group_dummies = np.einsum(einsum_str, *coordinate_dummies, )
group_dummies = group_dummies.reshape(n, -1, order='C')

43.3 ms ± 1.68 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [253]:
def run_test(**kwargs):
    print(kwargs)
    
np.asarray([(1,2), (3, 3)])

array([[1, 2],
       [3, 3]])

In [229]:
np.all(group_dummies_unpacked == group_dummies)

True

In [197]:
from itertools import product

def run_trial():
    num_intervals = [range(interval_dummies.shape[1]) for interval_dummies in coordinate_dummies]
    group_indices = list(product(*num_intervals))

    num_groups = len(group_indices)

    group_dummies = np.zeros((n, num_groups), dtype=int)
    final_arr = np.ones((n, 1), dtype=int)

    for i, g_index in enumerate(group_indices):
        for j, g_i in enumerate(g_index):
            final_arr *= coordinate_dummies[j][:,g_i,None]
        group_dummies[:,i] = final_arr.flatten()
        final_arr = 1
    return group_dummies
        
group_dummies = run_trial()

In [198]:
group_dummies

array([[0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,
        1, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1,
        1, 0, 1, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,
        1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 0]])

In [182]:
test.reshape(3, -1)

array([[0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0],
       [1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0]])