This notebook replicates the synthetic data experiments.

In [None]:
import numpy as np
import sys
sys.path.append("..")
import os
from utils import generate_synthetic_LTR_data
from fair_training_ranking_xgb import train_fair_nn
from sklearn.linear_model import LogisticRegression
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid

We need to delete these directories if they exist so plotting works properly

In [None]:
rm -r heatmaps

In [None]:
rm -r tensorboard_simulations

In [None]:
rm -r results_simulations

In [None]:
# create synthetic train/test data
num_docs_per_query = 10
num_queries = 100
X_queries, relevances, majority_status = generate_synthetic_LTR_data(num_queries = num_queries, num_docs_per_query = num_docs_per_query)
X_queries_test, relevances_test, majority_status_test = generate_synthetic_LTR_data(num_queries = num_queries, num_docs_per_query = num_docs_per_query)

if not os.path.exists('data'):
    os.makedirs('data')

np.save('data/X.npy', X_queries)
np.save('data/relevance.npy', relevances)
np.save('data/majority_status.npy', majority_status)
np.save('data/X_test.npy', X_queries_test)
np.save('data/relevance_test.npy', relevances_test)
np.save('data/majority_status_test.npy', majority_status_test)

In [None]:
#baseline
LR = LogisticRegression(C = 100).fit(X_queries, majority_status)
sens_directions = LR.coef_
print('sensitive directions', sens_directions)

_  = train_fair_nn(X_queries,
                                                        relevances,
                                                        majority_status,
                                                        X_test = X_queries_test,
                                                        relevance_test = relevances_test,
                                                        group_membership_test = majority_status_test,
                                                        num_items_per_query = num_docs_per_query,
                                                        tf_prefix='baseline',
                                                        weights=None,
                                                        n_units = [],
                                                        lr=0.04,
                                                        fair_start=1.,
                                                        batch_size=1,
                                                        epoch=20*num_queries,
                                                        verbose=True,
                                                        activ_f = tf.nn.relu,
                                                        l2_reg=0.0,
                                                        plot=True,
                                                        sens_directions=sens_directions,
                                                        seed=None,
                                                        simul=True, # need to make this true if you want to make plots
                                                        num_monte_carlo_samples = 10,
                                                        bias = False,
                                                        init_range = .0001,
                                                        entropy_regularizer = .0,
                                                        baseline_ndcg = True,
                                                        load = False
                                                        )

#Train SenSeI with different fair regularization strength
for fair_reg in [.0003, .001]:
    tf.reset_default_graph()
    print('fair_reg',fair_reg)
    _  = train_fair_nn(X_queries,
                                                        relevances,
                                                        majority_status,
                                                        num_items_per_query = num_docs_per_query,
                                                        tf_prefix='sensei',
                                                        X_test = X_queries_test,
                                                        relevance_test = relevances_test,
                                                        group_membership_test = majority_status_test,
                                                        weights=None,
                                                        n_units = [],
                                                        lr=0.04,
                                                        batch_size=1,
                                                        epoch=20*num_queries,
                                                        verbose=True,
                                                        activ_f = tf.nn.relu,
                                                        l2_reg=0.,
                                                        plot=True,
                                                        lamb_init=2.,
                                                        adv_epoch=20,
                                                        adv_step=.001,
                                                        epsilon=0.001,
                                                        sens_directions=sens_directions,
                                                        l2_attack=0.001,
                                                        adv_epoch_full=20,
                                                        fair_reg=fair_reg,
                                                        fair_start=0.,
                                                        seed=None,
                                                        simul=True,
                                                        num_monte_carlo_samples = 10,
                                                        bias = False,
                                                        init_range = .0001,
                                                        entropy_regularizer = .0,
                                                        baseline_ndcg = True,
                                                        load = True)


In [None]:
# run fair-pg-rank
!python PG.py

In [None]:
# plot results
X = np.load('data/X.npy')
relevances = np.load('data/relevance.npy')
xx = np.load('data/xx.npy')
yy = np.load('data/yy.npy')
Z = np.load('data/Z_0.0.npy')
Z = Z.reshape(xx.shape)
Z_2 = np.load('data/Z_0.0003.npy')
Z_2 = Z_2.reshape(xx.shape)
Z_3 = np.load('data/Z_0.001.npy')
Z_3 = Z_3.reshape(xx.shape)

# find the minority with the highest relevance but smallest x-value
min_x_idx = 0
min_x_value = 100
max_relevance = 0
max_relevance_idx = 0
for i in range(X.shape[0]):
    if X[i,1] == 0 and X[i,0] < 1.6:
        if max_relevance < relevances[i]:
            max_relevance_idx = i
            max_relevance = relevances[i]
print(max_relevance)
#find majority

max_relevance = 0
max_relevance_idx_majority = 0
for i in range(X.shape[0]):
    if X[i,1] > 2.9 and X[i,0] < 1.7 and X[i,0]>1.2:
        if np.abs(relevances[i] - max_relevance) < 5:
            max_relevance_idx_majority = i
            break
print(relevances[i])
fig = plt.figure(figsize=(15, 5))

grid = ImageGrid(fig, 111,          # as in plt.subplot(111)
                 nrows_ncols=(1,3),
                 axes_pad=0.15,
                 share_all=True,
                 cbar_location="right",
                 cbar_mode="single",
                 cbar_size="7%",
                 cbar_pad=0.15,
                 )

cm = 'PiYG_r'

plt.rc('xtick', labelsize=20) 
plt.rc('ytick', labelsize=20)

contour = grid[0].contourf(xx, yy, Z, cmap='bwr', alpha=.8)
grid[0].set_title('Baseline: $\\rho=0$', fontsize = 25)

contour2 = grid[0].scatter(X[:,0], X[:,1], cmap=cm, c=relevances)
grid[0].scatter(X[max_relevance_idx, 0], 0, marker = '*', s=250, color = 'blue')
grid[0].scatter(X[max_relevance_idx_majority, 0], X[max_relevance_idx_majority, 1], marker = '*', s=300, color = 'black')
#plt.colorbar(contour2)
#############################
contour = grid[1].contourf(xx, yy, Z_2, cmap='bwr', alpha=.8)
grid[1].set_title('SenSTIR: $\\rho=.0003$', fontsize = 25)

contour2= grid[1].scatter(X[:,0], X[:,1], cmap=cm, c=relevances)
grid[1].scatter(X[max_relevance_idx, 0], 0, marker = '*', s=250, color = 'blue')
grid[1].scatter(X[max_relevance_idx_majority, 0], X[max_relevance_idx_majority, 1], marker = '*', s=300, color = 'black')

#plt.colorbar(contour2)
#Final group_exposure_test_stochastic 0.0103

#############################
contour = grid[2].contourf(xx, yy, Z_3, cmap='bwr', alpha=.8)
grid[2].set_title('SenSTIR: $\\rho=.001$', fontsize = 25)

contour2 = grid[2].scatter(X[:,0], X[:,1], cmap=cm, c=relevances)
grid[2].scatter(X[max_relevance_idx, 0], 0, marker = '*', s=250, color = 'blue')
grid[2].scatter(X[max_relevance_idx_majority, 0], X[max_relevance_idx_majority, 1], marker = '*', s=300, color = 'black')


grid[2].cax.colorbar(contour2)
grid[2].cax.toggle_label(True)
#Final group_exposure_test_stochastic 0.008

# plt.tight_layout()
plt.savefig('synthetic.pdf')

fig = plt.figure(figsize=(15, 4))

grid = ImageGrid(fig, 111,          # as in plt.subplot(111)
                 nrows_ncols=(1,4),
                 axes_pad=0.15,
                 share_all=True,
                 cbar_location="right",
                 cbar_mode="single",
                 cbar_size="7%",
                 cbar_pad=0.15,
                 )

plt.rc('xtick', labelsize=15) 
plt.rc('ytick', labelsize=15)

baseline_heatmap = np.load('heatmaps/baseline_adv-epoch:100_batch_size:1_adv-step:1.0_l2_attack:0.01_adv_epoch_full:10_epsilon:None_lr:0.04_MC:10_reg:0.0_epoch:2000_l2reg:0.0_init_range:0.0001_arch:_heatmap_test_stochastic_0.npy')
sensei_heatmap_1 = np.load('heatmaps/sensei_adv-epoch:20_batch_size:1_adv-step:0.001_l2_attack:0.001_adv_epoch_full:20_epsilon:0.001_lr:0.04_MC:10_reg:0.0003_epoch:2000_l2reg:0.0_init_range:0.0001_arch:_heatmap_test_stochastic_0.npy')
sensei_heatmap_2 = np.load('heatmaps/sensei_adv-epoch:20_batch_size:1_adv-step:0.001_l2_attack:0.001_adv_epoch_full:20_epsilon:0.001_lr:0.04_MC:10_reg:0.001_epoch:2000_l2reg:0.0_init_range:0.0001_arch:_heatmap_test_stochastic_0.npy')
PG_heatmap = np.load('heatmaps/sensei_adv-epoch:100_batch_size:1_adv-step:1.0_l2_attack:0.01_adv_epoch_full:10_epsilon:None_lr:0.04_MC:10_reg:0.0_epoch:0_l2reg:0.0_init_range:0.0001_arch:_heatmap_test_stochastic_0.npy')

min = np.min(np.concatenate((baseline_heatmap, sensei_heatmap_1, sensei_heatmap_2, PG_heatmap)))
max = np.max(np.concatenate((baseline_heatmap, sensei_heatmap_1, sensei_heatmap_2, PG_heatmap)))

grid[0].set_title('Baseline: $\\rho=0$', fontsize = 17)
grid[0].set_xticks(np.arange(10))
grid[0].set_xticklabels(np.arange(11)[1:])
grid[0].set_yticks(np.arange(10))
grid[0].set_yticklabels(np.arange(11)[1:])

grid[0].imshow(baseline_heatmap,vmin=min, vmax=max, aspect='auto')

#plt.colorbar(contour2)
#############################

grid[2].set_title('SenSTIR: $\\rho=.0003$', fontsize = 17)
grid[2].set_xticks(np.arange(10))
grid[2].set_xticklabels(np.arange(11)[1:])
grid[2].set_yticks(np.arange(10))
grid[2].set_yticklabels(np.arange(11)[1:])

grid[2].imshow(sensei_heatmap_1,vmin=min, vmax=max, aspect='auto')
#cbar = plt.colorbar()
#cbar.ax.tick_params(labelsize=14) 
#plt.colorbar(contour2)

#############################

grid[3].set_title('SenSTIR: $\\rho=.001$', fontsize = 17)
grid[3].set_xticks(np.arange(10))
grid[3].set_xticklabels(np.arange(11)[1:])
grid[3].set_yticks(np.arange(10))
grid[3].set_yticklabels(np.arange(11)[1:])

im = grid[3].imshow(sensei_heatmap_2,vmin=min, vmax=max, aspect='auto')
# #############################

grid[1].set_title('Fair-PG-Rank: $\\lambda=25$', fontsize = 17)
grid[1].set_xticks(np.arange(10))
grid[1].set_xticklabels(np.arange(11)[1:])
grid[1].set_yticks(np.arange(10))
grid[1].set_yticklabels(np.arange(11)[1:])
grid[1].imshow(PG_heatmap,vmin=min, vmax=max, aspect='auto')
# #############################
# plt.subplot(1, 5, 5)
# plt.title('Project', fontsize = 20)
# plt.xticks(fontsize=16)
# plt.yticks(fontsize=16)
# plt.imshow(project_heatmap, vmin=min, vmax=max, aspect='auto')
# cbar = plt.colorbar()
# cbar.ax.tick_params(labelsize=14) 

grid[3].cax.colorbar(im)
grid[3].cax.toggle_label(True)

# plt.tight_layout()

plt.savefig('heatmap.pdf')