In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from itertools import combinations, permutations
from time import time
import pandas as pd
from numpy.random import random
import warnings
warnings.filterwarnings('ignore')
from scipy.misc import comb
from scipy import stats
from scipy.spatial import distance
from agg_util import *
from fare.metrics import *
import seaborn as sns
# from ir_metrics import *
import fairsearchcore as fsc
# from yang_metrics import *
# from dataGenerator import generateUnfairRanking

In [2]:
from bokeh.io import output_notebook, show
from bokeh.layouts import row, column, widgetbox, gridplot
from bokeh.models import CustomJS, Slider, Span, Label, LinearColorMapper, ColumnDataSource, CategoricalColorMapper, RangeTool, Range1d, LabelSet
from bokeh.plotting import figure, output_file, show, reset_output
from bokeh.transform import linear_cmap, cumsum
from bokeh.palettes import brewer
from bokeh.colors import RGB
from matplotlib import cm

output_notebook()

In [3]:
# read in some mallows data
r_data = pd.read_csv("data/R_n50_N10_t0.1.txt").as_matrix()

In [4]:
pink_group = '#8E0152'
green_group = '#85C049'

In [5]:
# p = 0.5 -> random, fair
# p large -> unfairly favors g1
# minority group is 0
def gen_minority_group_idx(n, p, ratio):
    len1 = int(n*ratio)
    len0 = n - len1
    groups = []
    n=0 #count the items in the minority group
    i=0
    while len1 > 0 and len0 >0:
        if random() < p: 
            groups.append(1)
            len1 -= 1
        else:
            groups.append(0)
            len0 -= 1
            n += 1
        i += 1
    while len1 > 0:
        groups.append(1)
        len1 -= 1
        i += 1
    while len0 >0:
        groups.append(0)
        len0 -= 1
        n += 1
        i+=1
    return np.array(groups, dtype = int),n

In [6]:
# generate some random rankings
# random rankings
n_ranks =100
rank_len = 50

# make copy in format for fa_ir
ranks = []
for i in range(n_ranks):
    r = np.array(range(rank_len))
    np.random.shuffle(r)
    ranks.append(r)
    
np.shape(ranks)

(100, 50)

In [7]:
# generate some random scores for identity ranking
# random rankings
n_ranks =100
rank_len = 50

s = np.random.uniform(0.0, 1.0, rank_len)
r = np.argsort(s)
scores = np.flip(s[r])

In [8]:
dists = []
for r in ranks:
    dists.append(np.sum([kendalltau_dist(r, x) for x in r_data]))

In [9]:
def scale(vals):
    maxd = np.max(vals)
    mind = np.min(vals)
    return [(x - mind) / (maxd-mind) for x in vals]

In [10]:
dists = scale(dists)

In [12]:
groups, m = gen_minority_group_idx(50, 0.7, 0.7)

In [13]:
pars = []
for r in ranks:
    g = [groups[x] for x in np.argsort(r)]
    e0,e1 = rank_parity(r, g)
    pars.append(abs(e0 -e1))

In [14]:
sliding_parity0 = []
sliding_parity1 = []
window = 10
for rank in ranks:
    e0s = []
    e1s = []
    for i in range(0,rank_len-window):
        r = rank[i:i+10]
        g = [groups[x] for x in np.argsort(r)]
        e0,e1 = rank_parity(r, g)
        e0s.append(e0)
        e1s.append(e1)
    for i in range(window):        
        e0s.append(0)
        e1s.append(0)
#     add buffer for last window to keep length same for columndatastore
    sliding_parity0.append(e0s)
    sliding_parity1.append(e1s)
    


In [78]:
colors = np.repeat('on', len(pars))
source = ColumnDataSource({'dists': dists, 'pars': pars,'colors':colors})
plot = figure(plot_width=600, plot_height=400)
plot.yaxis.axis_label = "Rank Parity"
plot.xaxis.axis_label = "Aggregation Accuracy"

cmap_cat = CategoricalColorMapper(factors=['on', 'off'], palette=[pink_group,'grey']) 
# cmap_pars = LinearColorMapper(palette=['red'], low=np.min(pars), high=np.max(pars), high_color='grey')
# cmap_dists = LinearColorMapper(palette=['red'], low=np.min(dists), high=np.max(dists), high_color='grey')

fair_slider = Slider(start=0, end=0.5, value=np.max(pars), step=.01, title="Rank Parity")
acc_slider = Slider(start=0, end=1, value=np.max(dists), step=.01, title="Aggregation Accuracy")
fair_span = Span(location=fair_slider.value, dimension='width')
acc_span = Span(location=acc_slider.value, dimension='height')
plot.add_layout(fair_span)
plot.add_layout(acc_span)

s = plot.scatter('dists', 'pars', source=source, line_color=None, radius=0.01, alpha=0.7, 
             fill_color={"field":'colors', "transform":cmap_cat})

n = len(source.data['colors'])

fair_slider.callback = CustomJS(args=dict(span=fair_span, f_slider=fair_slider, a_slider=acc_slider, 
                                          source=s.data_source), code="""
    span.location = f_slider.value;
    for(var i=0 ; i < source.data['colors'].length ; i++) {
        if(source.data['pars'][i] < f_slider.value && source.data['dists'][i] < a_slider.value) {
            source.data['colors'][i] = 'on';
        }
        else{
            source.data['colors'][i] = 'off';
        }
    }
    source.change.emit()
""")

acc_slider.callback = CustomJS(args=dict(span=acc_span, f_slider=fair_slider, a_slider=acc_slider, 
                                          source=s.data_source), code="""
    span.location = a_slider.value;
    for(var i=0 ; i < source.data['colors'].length ; i++) {
        if(source.data['pars'][i] < f_slider.value && source.data['dists'][i] < a_slider.value) {
            source.data['colors'][i] = 'on';
        }
        else{
            source.data['colors'][i] = 'off';
        }
    }
    source.change.emit()
""")
# b.js_on_click(CustomJS(args=dict(mapper=mapper), code="""
#     mapper.palette = ["pink", "purple", "orange"]
# """))

# output_file("fairness_accuracy_slider.html", title="Explore Rank Space")

show(row(plot, widgetbox(fair_slider, acc_slider)))

In [99]:
reset_output()

In [15]:
def get_rank_source(i):
#     r = ranks[i] #
    r = range(rank_len)
    re0 = sliding_parity0[0]
    re1 = sliding_parity1[0]
    g = [group_colors[x] for x in r]
    x = np.repeat(1,rank_len)
    y = range(1,rank_len+1, 1)
    names = [str(x+1) for x in r]
    label_x = np.repeat(0.1, rank_len)
#     label_y = [x-0.5 for x in y]
    
    return ColumnDataSource({'rank': r, 'x':x, 'y':y, 'color':g, 
                            'label_x': label_x, 'label_y': y, 'names': names, 'e0':re0, 'e1':re1, 'scores': scores})
    

In [19]:
group_colors = [i+20 if x==1 else (-1*i)-20 for i,x in enumerate(groups)]
r_source = get_rank_source(0)

m_rgb = (255 * cm.PiYG(range(256))).astype('int')
PiYG_palette = [RGB(*tuple(rgb)).to_hex() for rgb in m_rgb]

mapper = LinearColorMapper(
    palette = PiYG_palette,
    low = np.min(group_colors),
    high = np.max(group_colors)
)

In [72]:
hm = figure(title=" ", y_range=(20, 0), plot_width=400, plot_height=800, tools="ypan", toolbar_location=None)
hm.xaxis.visible = False
hm.yaxis.visible = False
hm.xgrid.visible = False
# hm.rect('x', 'y', source= r_source,  
#         fill_color={"field":'color', "transform":mapper}, 
#         line_color="white", width='scores', height=1)

hm.hbar(source= r_source,  
        fill_color={"field":'color', "transform":mapper}, 
        line_color="white", y='y', left=0, right='scores', height=1)

# y=[1, 2, 3], height=0.5, left=0,
#        right=[1.2, 2.5, 3.7]source= r_source, color="navy")

labels = LabelSet(x='label_x', y='label_y', text='names', text_color='black',
         source=r_source)

hm.add_layout(labels)
# output_file("categorical.html", title="categorical.py example")

show(hm)

In [20]:
group_sizes = ColumnDataSource({'names':['Group A','Group B'], 
                                'angles':[((rank_len - m)/rank_len)*2*np.pi , (m/rank_len)*2*np.pi ],
                                'colors':[green_group,pink_group]})
group_sizes.data['angles']

[4.39822971502571, 1.8849555921538759]

In [24]:
hm = figure(title=" ", y_range=(10, 0), x_range=(0, 1.5), plot_width=600, plot_height=600, tools="ypan", toolbar_location=None)
hm.xaxis.visible = False
# hm.yaxis.visible = False
hm.xgrid.visible = False
# hm.rect('x', 'y', source= r_source,  
#         fill_color={"field":'color', "transform":mapper}, 
#         line_color="white", width='scores', height=1)

hm.hbar(source= r_source,  
        fill_color={"field":'color', "transform":mapper}, 
        line_color="white", y='y', left=0, right='scores', height=1)

# y=[1, 2, 3], height=0.5, left=0,
#        right=[1.2, 2.5, 3.7]source= r_source, color="navy")

labels = LabelSet(x='label_x', y='label_y', text='names', text_color='black',
         source=r_source)

hm.add_layout(labels)

select = figure(title="Group advantage",
                plot_height=200, plot_width=600, x_range=(0, 40), toolbar_location=None, background_fill_color="#efefef")
# x_axis_type="datetime", y_axis_type=None,

range_tool = RangeTool(x_range=hm.y_range)
range_tool.overlay.fill_color = "grey"
range_tool.overlay.fill_alpha = 0.3

select.varea(x='y', y1=0, y2='e1', source=r_source, alpha=0.5, color=green_group )
select.varea(x='y', y1=0, y2='e0', source=r_source, alpha=0.5, color=pink_group)
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool

pie = figure(plot_height=200, plot_width=400, title="Groups Distribution", toolbar_location=None, x_range=(-0.5, 1.0))
pie.wedge(x=0, y=1, radius=0.2, 
        start_angle=cumsum('angles', include_zero=True), end_angle=cumsum('angles'),
        line_color="white", fill_color='colors', legend_field='names', source=group_sizes)

pie.axis.axis_label=None
pie.axis.visible=False
pie.grid.grid_line_color = None

output_file("range_error_viewer.html", title="Rank Fairness Range View")
show(row(hm, column(pie, select)))
# make a grid
# grid = gridplot([[hm, None, pie], [None, None, select]])

# show the results
# show(grid)
