In [44]:
# Import necessary modules/libraries
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
import plotly.express as px
import plotly.graph_objects as go

In [45]:
from utils import expected
def fair_opt_step(A, B, u_plus, u_minus, c_plus, c_minus, alpha):
    A = np.asarray(A)
    B = np.asarray(B)
    np.random.seed(1)

    w_a = len(A) / (len(A) + len(B))
    w_b = 1 - w_a

    mean_A, mean_B, util_A, util_B = change(A, B, c_plus, c_minus, u_plus, u_minus)

    fairness_diff = np.abs(mean_A - mean_B)
    total_util = w_a * util_A + w_b * util_B

    # Apply fairness constraint
    total_util_masked = np.where(fairness_diff <= alpha, total_util, -np.inf)

    # Find best pair (max utility under fairness constraint)

    flat_idx = np.argmax(total_util_masked)
    i, j = np.unravel_index(flat_idx, total_util.shape)

    
    '''
    k, l = np.unravel_index(np.argmax(total_util), total_util.shape)
    bench = -np.inf
    best_idx = None
    epsilon = 1.08

    for i in reversed(range(total_util_masked.shape[0])):
        val = total_util_masked[i, l]
        if val > bench * epsilon:
            bench = val
            best_idx = (i, l)


    if best_idx is not None:
        i, j = best_idx
    else:
        flat_idx = np.argmax(total_util_masked)
        i, j = np.unravel_index(flat_idx, total_util.shape)
    '''

    opt_A = A[i]
    opt_B = B[j]
    updated_samples = (mean_A[i, j], mean_B[i, j])
    max_util = total_util_masked[i, j]

    return (opt_A, opt_B, max_util, updated_samples)

def change(A, B, c_plus, c_minus, u_plus, u_minus, prob=0.4):
    A = np.asarray(A)
    B = np.asarray(B)

    delta_A = expected(A, c_plus, c_minus)
    delta_B = expected(B, c_plus, c_minus)

    A_matrix = A[:, None]  # shape (n, 1)
    B_matrix = B[:, None]  # shape (m, 1)

    delta_A_matrix = delta_A[:, None]
    delta_B_matrix = delta_B[:, None]

    # Add small jitter to break ties
    #jitter_A = np.random.choice([1e-8, -1e-8], size=A.shape, p=[0.4, 0.6])
    #jitter_B = np.random.choice([1e-8, -1e-8], size=B.shape, p=[0.4, 0.6])

    A_matrix_adj = np.where(A_matrix > A_matrix.T, A_matrix + delta_A_matrix, A_matrix)
    B_matrix_adj = np.where(B_matrix > B_matrix.T, B_matrix + delta_B_matrix, B_matrix)

    # Break ties
    #A_matrix_adj = np.where(A_matrix + delta_A_matrix == A_matrix.T, A_matrix + jitter_A[:, None], A_matrix_adj)
    #B_matrix_adj = np.where(B_matrix + delta_B_matrix == B_matrix.T, B_matrix + jitter_B[:, None], B_matrix_adj)

    mean_A = np.mean(A_matrix_adj, axis=0)
    mean_B = np.mean(B_matrix_adj, axis=0)

    util_A = np.sum(expected(A_matrix_adj, u_plus, u_minus), axis=0)
    util_B = np.sum(expected(B_matrix_adj, u_plus, u_minus), axis=0)

    # Convert to meshgrids for threshold pairs
    mean_A_grid, mean_B_grid = np.meshgrid(mean_A, mean_B, indexing='ij')
    util_A_grid, util_B_grid = np.meshgrid(util_A, util_B, indexing='ij')

    return mean_A_grid, mean_B_grid, util_A_grid, util_B_grid


In [46]:
# Set experiment parameters
np.random.seed(1)

# Distribution parameters
mean_a, std_a = 0.5, 1
mean_b, std_b = 0.0, 1

# Domain of alpha values to test
alpha_min, alpha_max = 0.01, 1
alphas = np.linspace(alpha_min, alpha_max, num=100)

# Sample size of each distribution
n = 2500

# Generate samples from normal distributions
a = np.random.normal(mean_a, std_a, n)
b = np.random.normal(mean_b, std_b, n)
a.sort(), b.sort()
w_a = len(a) / (len(a) + len(b))
w_b = 1 - w_a

#Step Parameters

# U+ / U- > C+ / C-
u_plus = 1
u_minus = -1.1
c_plus = 1
c_minus = -0.4
print( "Assumption 2: U+ / U- > C+ / C- is", (u_plus / u_minus) > (c_plus / c_minus) )

Assumption 2: U+ / U- > C+ / C- is True


In [47]:
# Single step optimization
from utils import opt_threshold, opt_step, expected #fair_opt_step

x_alphas = []
y_mean_A, y_mean_B = [], []
y_thresh_A, y_thresh_B = [], []
y_util = []
y_pof = [] # Price of fairness


y_opt_util = []

B_temp = None

opt_util_A, opt_util_B = opt_step(a, u_plus, u_minus, c_plus, c_minus)[0], opt_step(b, u_plus, u_minus, c_plus, c_minus)[0]
opt_util_A, opt_util_B  = expected(opt_util_A, 1, -1.1), expected(opt_util_B, 1, -1.1)
opt_util_A, opt_util_B = np.sum(opt_util_A), np.sum(opt_util_B)
y_opt = w_a * opt_util_A + w_b * opt_util_B

for alpha in tqdm(alphas):
    results = fair_opt_step(a, b, u_plus, u_minus, c_plus, c_minus, alpha)
    thresh_A, thresh_B, max_util, (A, B) = results
    temp_A = np.where(a>thresh_A, a+expected(a, c_plus, c_minus), a)
    temp_B = np.where(b>thresh_B, b+expected(b, c_plus, c_minus), b)
    #if np.mean(temp_A) != A or np.mean(temp_B) != B:
        #print(f'{np.mean(temp_A)} vs {A}')
        #print(f'{np.mean(temp_B)} vs {B}')
        #break
    if np.abs(A-B) > alpha:
        continue
    else:
        x_alphas.append(alpha)
        y_mean_A.append(A)
        y_mean_B.append(B)
        y_thresh_A.append(thresh_A)
        y_thresh_B.append(thresh_B)
        y_util.append(max_util)
        pof = 1 - (max_util/y_opt)
        y_pof.append(pof)

100%|██████████| 100/100 [01:48<00:00,  1.08s/it]


In [48]:
def plot_graphs():
    
    # Traces
    thresholds_A = go.Scatter(x=x_alphas, y=y_thresh_A, mode='markers', name="Fair Threshold (A)")
    thresholds_B = go.Scatter(x=x_alphas, y=y_thresh_B, mode='markers', name="Fair Threshold (B)") 
    utilities = go.Scatter(x=x_alphas, y=y_util, mode='markers', name='Utility', yaxis='y2')

    # Create figure
    fig = go.Figure(data=[thresholds_A, thresholds_B, utilities])

    # Add toggle buttons
    fig.update_layout(
        height = 600,
        title='Single Step Thresholding Policy',
        xaxis=dict(title="Alpha"),
        yaxis=dict(title="Threshold"),
        yaxis2=dict(
            title="Utility",
            overlaying="y",
            side="right"
        ),
        legend=dict(
            x=0,          # Right edge of the plotting area
            #y=1,          # Top of the plotting area
            xanchor='right',   # Legend's left edge aligns at x=1
            yanchor='top'     # Legend's top edge aligns at y=1
        ),
        showlegend=True,
    )

    fig.show()
plot_graphs()

In [49]:
# Experiment 2
from experiment_2 import experiment_2
test_alpha = 0.52
ex2a, ex2b = experiment_2(a, b, u_plus, u_minus, c_plus, c_minus, test_alpha, w_a, w_b, thresh_B, a, alphas)
ex2a.update_layout(
    xaxis_title ='Threshold A (using samples)'
)
ex2a.show()
#ex2b.show()

trace1 = ex2a.data[1]
trace2 = ex2a.data[2]
mean_diffs = dict(zip(trace1.x, trace1.y))
utilities = dict(zip(trace2.x, trace2.y))

y_thresh_A = np.array(y_thresh_A)
y_thresh_B = np.array(y_thresh_B)
x_alphas = np.array(x_alphas)
indices = np.where(y_thresh_A < y_thresh_B)[0]
test_alphas  = x_alphas[indices]
print(test_alphas)
#test_alphas = np.arange(0.54, 0.63, 0.01)
results = []

for test_alpha in test_alphas:
    # Filter mean_diffs under current threshold
    filtered = {k: v for k, v in mean_diffs.items() if v < test_alpha}

    # Find before and after keys
    before_keys = [k for k in filtered if k < 0]
    after_keys = [k for k in filtered if k > 0]

    before = max(before_keys) if before_keys else None
    after = min(after_keys) if after_keys else None

    if before is not None and after is not None:
        # Prepare data
        utility_before = utilities[before]
        utility_after = utilities[after]
        utility_diff = np.abs(utility_after - utility_before)

        # Build vertical block
        block = pd.DataFrame([
            {
                'Test Alpha': round(test_alpha, 3),
                'Side': 'Negative',
                'Threshold': before,
                'Mean Difference': mean_diffs[before],
                'Utility': utility_before
            },
            {
                'Test Alpha': round(test_alpha, 3),
                'Side': 'Positive',
                'Threshold': after,
                'Mean Difference': mean_diffs[after],
                'Utility': utility_after
            },
            {
                'Test Alpha': round(test_alpha, 3),
                'Side': 'Δ Utility',
                'Threshold': '',
                'Mean Difference': '',
                'Utility': utility_diff
            }
        ])

        print(block.to_string(index=False))
        print("--------")


100%|██████████| 2500/2500 [00:00<00:00, 9818.58it/s] 


[]


In [50]:
'''
t_x = []
t_A, t_B = [], []

# Precompute expected values
d_A = expected(a, c_plus, c_minus)
d_B = expected(b, c_plus, c_minus)
u_A = expected(a, u_plus, u_minus)
u_B = expected(b, u_plus, u_minus)

for k in tqdm(alphas):
    t_util = -np.inf
    thresh_A = None
    thresh_B = None

    for i in a:
        for j in b:
            A = np.where(a > i, a + d_A, a)
            B = np.where(b > j, b + d_B, b)
            util = w_a * np.sum(A) + w_b * np.sum(B)

            # Enforce similarity constraint
            if np.abs(np.mean(A) - np.mean(B)) > k:
                continue

            if util > t_util:
                t_util = util
                thresh_A = i
                thresh_B = j

    if thresh_A is not None and thresh_B is not None:
        t_x.append(k)
        t_A.append(thresh_A)
        t_B.append(thresh_B)

# Plotting after loop
plt.plot(t_x, t_A, label='Threshold A')
plt.plot(t_x, t_B, label='Threshold B')
plt.xlabel("Alpha (k)")
plt.ylabel("Thresholds")
plt.legend()
plt.show()
'''

'\nt_x = []\nt_A, t_B = [], []\n\n# Precompute expected values\nd_A = expected(a, c_plus, c_minus)\nd_B = expected(b, c_plus, c_minus)\nu_A = expected(a, u_plus, u_minus)\nu_B = expected(b, u_plus, u_minus)\n\nfor k in tqdm(alphas):\n    t_util = -np.inf\n    thresh_A = None\n    thresh_B = None\n\n    for i in a:\n        for j in b:\n            A = np.where(a > i, a + d_A, a)\n            B = np.where(b > j, b + d_B, b)\n            util = w_a * np.sum(A) + w_b * np.sum(B)\n\n            # Enforce similarity constraint\n            if np.abs(np.mean(A) - np.mean(B)) > k:\n                continue\n\n            if util > t_util:\n                t_util = util\n                thresh_A = i\n                thresh_B = j\n\n    if thresh_A is not None and thresh_B is not None:\n        t_x.append(k)\n        t_A.append(thresh_A)\n        t_B.append(thresh_B)\n\n# Plotting after loop\nplt.plot(t_x, t_A, label=\'Threshold A\')\nplt.plot(t_x, t_B, label=\'Threshold B\')\nplt.xlabel("Alpha

In [51]:
threshA = go.Scatter(x=t_x, y=t_A, mode='markers', name="Threshold A")
threshB = go.Scatter(x=t_x, y=t_B, mode='markers', name="Threshold B")
fig = go.Figure(data=[threshA, threshB])
fig.show()

NameError: name 't_x' is not defined