This notebook is used to calibrate lambda

In [44]:
import numpy as np
from scipy.interpolate import griddata
import pandas as pd
import math

In [45]:
builder_lambda = 0.7
dataset = 'utkface'
method = 'fairPATE'

In [46]:
# load the losses
loss_dir = f"../previous_results/loss_functions/{dataset}/{method}"
loss_builder_acc = np.load(loss_dir+'/builder_loss_acc.npy')  
if method == 'fairPATE':
    loss_builder_cov = np.load(loss_dir+'/builder_loss_cov.npy')
loss_privacy = np.load(loss_dir+'/privacy_loss.npy')
loss_fairness = np.load(loss_dir+'/fairness_loss.npy')
priv_fair_values = np.load(loss_dir+'/priv_fair_values.npy')

priv_values = priv_fair_values[:,0]
fair_values = priv_fair_values[:,1]
if method == 'fairPATE':
    losses = np.squeeze(np.stack((-1 * loss_builder_acc, loss_privacy, loss_fairness, -1 * loss_builder_cov), axis=-1))
else:
    losses = np.squeeze(np.stack((-1 * loss_builder_acc, loss_privacy, loss_fairness), axis=-1))

In [47]:
def is_pareto_efficient(costs, return_mask = False):
    """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
    """
    is_efficient = np.arange(costs.shape[0])
    n_points = costs.shape[0]
    next_point_index = 0  # Next index in the is_efficient array to search for
    while next_point_index<len(costs):
        nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
        nondominated_point_mask[next_point_index] = True
        is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
        costs = costs[nondominated_point_mask]
        next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
    if return_mask:
        is_efficient_mask = np.zeros(n_points, dtype = bool)
        is_efficient_mask[is_efficient] = True
        return is_efficient_mask
    else:
        return is_efficient

In [48]:
def get_pf(losses, priv_values, fair_values):
    # select points on the PF
    pf_indices = is_pareto_efficient(losses)
    pf_losses = losses[pf_indices, :]
    pf_priv = priv_values[pf_indices]
    pf_fair = fair_values[pf_indices]
    
    return pf_losses, pf_priv, pf_fair, pf_indices

In [49]:
# interpolate to get surface
def interpolate_losses(losses, priv_values, fair_values):
    '''
        Interpolate the losses into a grid format
        :param priv_values: epsilon of points
        :param fair_values: gamma of points
        :param loss: loss value specific to each agent
    '''
    x = priv_values
    y = fair_values
    xi = np.linspace(x.min(), x.max(), 50)
    yi = np.linspace(y.min(), y.max(), 50)
    X,Y = np.meshgrid(xi,yi)
    losses_inter = griddata((x,y),losses,(X,Y), method='linear')
    
    return losses_inter, xi, yi

In [50]:
# get points on pf
pf_losses, pf_priv, pf_fair, pf_indices = get_pf(losses, priv_values, fair_values)
loss_privacy = pf_losses[:, 1]
loss_fairness = pf_losses[:, 2]
loss_builder_weighted = builder_lambda *0.01 * pf_losses[:, 0] + (1-builder_lambda) * pf_losses[:, 3]

# Method 1: using gradients

In [51]:
interpolated_loss_b, pi, fi = interpolate_losses(loss_builder_weighted, loss_privacy, loss_fairness)

In [None]:
fi[1] - fi[0]

In [53]:
# gradient
grad_builder = np.gradient(interpolated_loss_b, fi[1] - fi[0], pi[1] - pi[0])

In [11]:
# calculate lambda_priv
grad_priv = grad_builder[1]
masked_priv = np.ma.masked_array(grad_priv, np.isnan(grad_priv))
# calculate your weighted average here instead
average = np.ma.average(masked_priv, axis=1)
# this gives you the result
lambda_priv = average.filled(np.nan)
lambda_priv = -np.average(lambda_priv[~np.isnan(lambda_priv)])

# calculate lambda_fair
grad_fair = grad_builder[0]
masked_fair = np.ma.masked_array(grad_fair, np.isnan(grad_fair))
# calculate your weighted average here instead
average = np.ma.average(masked_fair, axis=1)
# this gives you the result
lambda_fair = average.filled(np.nan)
lambda_fair = -np.average(lambda_fair[~np.isnan(lambda_fair)])

In [None]:
lambda_priv

In [None]:
lambda_fair

# Method 2: using q-cut

In [67]:
fair_cut = pd.qcut(loss_fairness, 20, labels=False)
priv_cut = pd.qcut(loss_privacy, 12, labels=False, duplicates='drop')

In [None]:
lambdas_priv = []
for i in range(10):
    # for each bin, select all points within
    temp_build = loss_builder_weighted[fair_cut == i]
    temp_priv = loss_privacy[fair_cut == i]
    values = []
    # calculate all pairs
    for j in range(len(temp_build)-1):
        temp_lambda = (temp_build-temp_build[j])/(temp_priv[j] - temp_priv)
        values = values + list(temp_lambda[j+1:][temp_lambda[j+1:] != -math.inf])
    lambdas_priv.append(sum(values)/len(values))

In [None]:
sum(lambdas_priv)/len(lambdas_priv)

In [None]:
lambdas_fair = []
for i in range(10):
    # for each bin, select all points within
    temp_build = loss_builder_weighted[priv_cut == i]
    temp_fair = loss_fairness[priv_cut == i]
    values = []
    # calculate all pairs
    for j in range(len(temp_build)-1):
        temp_lambda = (temp_build-temp_build[j])/(temp_fair[j] - temp_fair)
        values = values + list(temp_lambda[j+1:][temp_lambda[j+1:] != -math.inf])
    lambdas_fair.append(sum(values)/len(values))

In [None]:
sum(lambdas_fair)/len(lambdas_fair)