In [5]:
%cd /home/giorgian/Documente/Cercetare/Fairness/nn_fairness

/home/giorgian/Documente/Cercetare/Fairness/nn_fairness


In [6]:
import pickle
from polytope import Polytope
import numpy as np
from polytope import is_fulldim
import quad
import json
from prob import ProbabilityDensityComputer
import math
from scipy.spatial import ConvexHull

In [7]:
import numpy as np
from polytope.polytope import reduce, cheby_ball, Region, ABS_TOL
from quickhull import quickhull

def extreme(poly1):
    """Compute the extreme points of a _bounded_ polytope
    @param poly1: Polytope in dimension d
    @return: A (N x d) numpy array containing the N vertices of poly1
    """
    if poly1.vertices is not None:
        # In case vertices already stored
        return poly1.vertices
    V = np.array([])
    R = np.array([])
    if isinstance(poly1, Region):
        raise Exception("extreme: not executable for regions")
    # `poly1` is a `Polytope`
    # TODO: change so that we assume that poly1 is already reduced
    poly1 = reduce(poly1)  # Need to have polytope non-redundant!
    if not is_fulldim(poly1):
        return None
    # `poly1` isn't flat
    A = poly1.A.copy()
    b = poly1.b.copy()
    sh = np.shape(A)
    nc = sh[0]
    nx = sh[1]
    # distinguish cases by dimension
    if nx == 1:
        # Polytope is a 1-dim line
        for ii in range(nc):
            V = np.append(V, b[ii] / A[ii])
        if len(A) == 1:
            R = np.append(R, 1)
            raise Exception("extreme: polytope is unbounded")
    elif nx == 2:
        # Polytope is 2D
        alf = np.angle(A[:, 0] + 1j * A[:, 1])
        I = np.argsort(alf)
        H = np.vstack([A, A[0, :]])
        K = np.hstack([b, b[0]])
        I = np.hstack([I, I[0]])
        for ii in range(nc):
            HH = np.vstack([H[I[ii], :], H[I[ii + 1], :]])
            KK = np.hstack([K[I[ii]], K[I[ii + 1]]])
            if np.linalg.cond(HH) == np.inf:
                R = np.append(R, 1)
                raise Exception("extreme: polytope is unbounded")
            else:
                try:
                    v = np.linalg.solve(HH, KK)
                except Exception:
                    msg = 'Finding extreme points failed, '
                    msg += 'Check if any unbounded Polytope '
                    msg += 'is causing this.'
                    raise Exception(msg)
                if len(V) == 0:
                    V = np.append(V, v)
                else:
                    V = np.vstack([V, v])
    else:
        # General nD method,
        # solve a vertex enumeration problem for
        # the dual polytope
        rmid, xmid = cheby_ball(poly1)
        A = poly1.A.copy()
        b = poly1.b.copy()
        sh = np.shape(A)
        Ai = np.zeros(sh)
        for ii in range(sh[0]):
            Ai[ii, :] = A[ii, :] / (b[ii] - np.dot(A[ii, :], xmid))
        #print(f"{Ai=}")
        p, active = qhull(Ai)
        #print(f'{p.A.shape=}')
        Q = reduce(p)
        #print(f"{Q.vertices=}")
        #print(f"{Q.A.shape=}")
        #print(f"{Q.b=}")
        if not is_fulldim(Q):
            return None
        # `Q` isn't flat
        H = Q.A
        K = Q.b
        sh = np.shape(H)
        nx = sh[1]
        V = np.zeros(sh)
        for iv in range(sh[0]):
            for ix in range(nx):
                V[iv, ix] = H[iv, ix] / K[iv] + xmid[ix]
    a = V.size / nx
    if not a.is_integer():
        raise AssertionError(a)
    a = int(a)
    poly1.vertices = V.reshape((a, nx))
    return poly1.vertices, active

def qhull(vertices, abs_tol=ABS_TOL):
    """Use quickhull to compute a convex hull.
    @param vertices: A N x d array containing N vertices in dimension d
    @return: L{Polytope} describing the convex hull
    """
    A, b, vert, active = quickhull(vertices, abs_tol=abs_tol)
    if A.size == 0:
        return Polytope(), None
    return Polytope(A, b, minrep=True, vertices=vert), active

In [8]:
def compute_volume(A, b, fixed_indices):
    #keep_indices = [i for i in range(A.shape[1]) if i not in fixed_indices]
    #b -= A[:, fixed_indices] @ b[fixed_indices]
    from polytope.polytope import reduce
    #A 
    N = A.shape[1]
    M = A.shape[0]
    Ac = np.concatenate((A, -np.identity(N)), axis=0)
    bc = np.concatenate((b, np.zeros(N)))
    
    # First remove redundant constraints
    poly = reduce(Polytope(Ac, bc))
    vertices, active_constraints = extreme(poly)
    A = poly.A
    b = poly.b
    N = A.shape[1]
    M = A.shape[0]
    
    slack = np.identity(M)
    last_row = np.zeros((1, A.shape[1] + M + 1))
    tableu = np.concatenate((A[:M], slack, b.reshape(b.shape[0], 1)[:M]), axis=-1)
    tableu = np.concatenate((tableu, last_row), axis=0)
    tableu[-1, :N] = -1

    volume = 0
    for p, active_indices in zip(vertices, active_constraints):
        #print(active_indices)
        #active = np.isclose((A @ p)[:M], b[:M])
        active = np.zeros(M, dtype=bool)
        active[active_indices] = 1
        #print(p, np.sort(active_indices), np.where(active)[0].tolist())
        
        # Find the basic and non-basic variables
        basic_indices = np.arange(M)
        basic_indices[np.logical_not(active)] += N # These are the basic slack variables
        #basic_indices[active] = np.arange(N)[np.logical_not(np.isclose(p, 0))] # These are the basic non-slack variables
        basic_indices[active] = np.arange(N)
        basic_indices = np.sort(basic_indices)
        possible_indices = np.arange(M + N)
        unused = np.ones(M + N).astype(np.bool_)
        unused[basic_indices] = False

        non_basic_indices = possible_indices[unused]
        #print(np.sort(active_indices), non_basic_indices)

        δ_v = abs(np.linalg.det(tableu[:M, basic_indices]))

        # So now it's for the constraints that are active that we need to figure out
        # Which is a NB variable
        c = tableu[-1, :M+N]
        c_N = c[non_basic_indices]
        c_B = c[basic_indices]
        # Matrix for basic  variables

        A_B = tableu[:M, basic_indices]

        # Matrix for non-basic variables
        A_N = tableu[:M, non_basic_indices]

        # Reduced cost
        cost = c_N - c_B @ np.linalg.inv(A_B) @ A_N
        f_v = np.sum(p)
        volume += f_v**N/np.prod(cost) * 1/δ_v
    volume /= math.factorial(N)
    return volume


In [9]:
# Load probability distributions
with open('configs/config-compas.json') as handle:
    config = json.load(handle)
with open(config['train_data_path'], 'rb') as f:
    data_dict = pickle.load(f)
    X = data_dict['X_train']
    y = data_dict['y_train']
    
c1 = config['class_1']
c2 = config['class_2']
class_1_indices = np.array(c1['indices'])
class_1_values = np.array(c1['values'])
class_2_indices = np.array(c2['indices'])
class_2_values = np.array(c2['values'])
def is_class_1(x):
    return np.allclose(x[class_1_indices], class_1_values, atol=1e-1)

def is_class_2(x):
    return np.allclose(x[class_2_indices], class_2_values, atol=1e-1)

class_1_prob = ProbabilityDensityComputer(
        X,
        config['discrete_indices'],
        config['continuous_indices'],
        config['one_hot_indices'],
        config['fixed_indices'],
        is_class_1
)

class_2_prob = ProbabilityDensityComputer(
        X,
        config['discrete_indices'],
        config['continuous_indices'],
        config['one_hot_indices'],
        config['fixed_indices'],
        is_class_2
)

In [10]:
with open('polys.pickle', 'rb') as handle:
    res = pickle.load(handle)

for k, v in res['lpi_instance'].items():
    for poly in v:
        poly.deserialize()

In [11]:
class_1_prob.sample = lambda self, *args: 1

In [12]:
def block_volume(group, i):
    lpi = res['lpi_instance'][group][i]
    fixed_indices = res['fixed_indices'][group]
    return quad.block_qhull(lpi, class_1_prob, fixed_indices)

def block_fast_volume(group, i):
    lpi = res['lpi_instance'][group][i]
    fixed_indices = res['fixed_indices'][group]
    A = lpi.get_constraints_csr().toarray()
    b = lpi.get_rhs()
    return compute_volume(A, b, fixed_indices)

def fast_volume(group, i):
    lpi = res['lpi_instance'][group][i]
    fixed_indices = res['fixed_indices'][group]
    A = lpi.get_constraints_csr().toarray()
    b = lpi.get_rhs()
    return compute_volume(A, b, fixed_indices)

def qhull_volume(group, i):
    from polytope.polytope import extreme
    lpi = res['lpi_instance'][group][i]
    fixed_indices = res['fixed_indices'][group]
    A = lpi.get_constraints_csr().toarray()
    b = lpi.get_rhs()
    N = A.shape[1]
    M = A.shape[0]
    # These ensure all the polytope is contained in the positive
    # hemisphere
    Ac = np.concatenate((A, -np.identity(N)), axis=0)
    bc = np.concatenate((b, np.zeros(N)))
    poly = Polytope(Ac, bc)
    vertices = extreme(poly)
    #print(vertices)
    return ConvexHull(vertices).volume

def get_lpi(group, i):
    return res['lpi_instance'][group][i]

In [13]:
list(class_1_prob.non_discretized_regions)

[((0.06532258064516133, 0.9508064516129032),
  (0.05, 0.95),
  (0.0, 0.0),
  (0.0, 0.0)),
 ((0.06532258064516133, 0.9508064516129032),
  (0.05, 0.95),
  (0.0, 0.0),
  (1.0, 1.0))]

In [14]:
list(class_1_prob.regions)

[((0.06532258064516133, 0.9508064516129032),
  (0.05, 0.95),
  (0.0, 0.0),
  (0.0, 0.0)),
 ((0.06532258064516133, 0.9508064516129032),
  (0.05, 0.95),
  (0.0, 0.0),
  (1.0, 1.0))]

In [15]:
class_1_prob.continuous_bounds

[((0.06532258064516133, 0.16370967741935488),
  (0.16370967741935488, 0.26209677419354843),
  (0.26209677419354843, 0.360483870967742),
  (0.360483870967742, 0.4588709677419355),
  (0.4588709677419355, 0.557258064516129),
  (0.557258064516129, 0.6556451612903226),
  (0.6556451612903226, 0.7540322580645161),
  (0.7540322580645161, 0.8524193548387097),
  (0.8524193548387097, 0.9508064516129032)),
 ((0.05, 0.15000000000000002),
  (0.15000000000000002, 0.25),
  (0.25, 0.35000000000000003),
  (0.35000000000000003, 0.45),
  (0.45, 0.55),
  (0.55, 0.6500000000000001),
  (0.6500000000000001, 0.75),
  (0.75, 0.8500000000000001),
  (0.8500000000000001, 0.95))]

In [16]:
class_1_prob.non_discretized_continuous_bounds

[[(0.06532258064516133, 0.9508064516129032)], [(0.05, 0.95)]]

In [21]:
if hasattr(class_1_prob, 'discretized'):
    del class_1_prob.discretized
block_volume('white', 0)

Setting column 0 to (0.06532258064516133, 0.9508064516129032)
Setting column 1 to (0.05, 0.95)
Fixing column 2 to 0.0
Setting column 0 to (0.06532258064516133, 0.9508064516129032)
Setting column 1 to (0.05, 0.95)
Fixing column 2 to 1.0


0.001085151077055335

In [22]:
class_1_prob.discretized = True
block_volume('white', 0)

Setting column 0 to (0.06532258064516133, 0.16370967741935488)
Setting column 1 to (0.05, 0.15000000000000002)
Fixing column 2 to 0.0
Setting column 0 to (0.06532258064516133, 0.16370967741935488)
Setting column 1 to (0.05, 0.15000000000000002)
Fixing column 2 to 1.0
Setting column 0 to (0.06532258064516133, 0.16370967741935488)
Setting column 1 to (0.15000000000000002, 0.25)
Fixing column 2 to 0.0
Setting column 0 to (0.06532258064516133, 0.16370967741935488)
Setting column 1 to (0.15000000000000002, 0.25)
Fixing column 2 to 1.0
Setting column 0 to (0.06532258064516133, 0.16370967741935488)
Setting column 1 to (0.25, 0.35000000000000003)
Fixing column 2 to 0.0
Setting column 0 to (0.06532258064516133, 0.16370967741935488)
Setting column 1 to (0.25, 0.35000000000000003)
Fixing column 2 to 1.0
Setting column 0 to (0.06532258064516133, 0.16370967741935488)
Setting column 1 to (0.35000000000000003, 0.45)
Fixing column 2 to 0.0
Setting column 0 to (0.06532258064516133, 0.16370967741935488)

0.0010851510770553212

In [18]:
fast_volume('white', 1)

0.12761229521064324

In [19]:
qhull_volume('white', 15)

IndexError: tuple index out of range

In [None]:
b_vols = np.array([block_volume('white', i) for i in range(14)])
q_vols = np.array([qhull_volume('white', i) for i in range(14)])
l_vols = np.array([fast_volume('white', i) for i in range(14)])

In [None]:
q_vols

In [None]:
l_vols

In [None]:
b_vols

In [None]:
(b_vols - q_vols)/q_vols

In [None]:
l_vols

In [None]:
print(list(class_1_prob.regions))