# Kendall Tau Distance

In [1]:
from scipy.stats import kendalltau
import numpy as np
import wiggum as wg

In [2]:
subgroup = ['A', 'B']
aggregate = ['B','A']

In [3]:
def categorical_kendalltau(list1,list2):
    numeric_map = {a:i for i,a in enumerate(list1)}
    num1 = [numeric_map[a] for a in list1]
    num2 = [numeric_map[b] for b in list2]

    return kendalltau(num1,num2)

In [4]:
tau,p = categorical_kendalltau(aggregate,subgroup)
tau

  size * (size - 1) * (size - 2))


-1.0

Kendall's $\tau$ varies from -1 (exact opposite) to 1 (perfect match). To use it as a distance, we want perfect match to be 1 and exact opposite to be 0, so then we scale it.

In [5]:
scale = lambda t: (t+1)/2
flip = lambda t: 1-t

In [9]:
subgroup = ['B', 'W','H']
aggregate = ['B','H','W',]
tau,p = categorical_kendalltau(aggregate,subgroup)
print(tau,scale(tau),flip(scale(tau)))

0.33333333333333337 0.6666666666666667 0.33333333333333326


In [7]:
subgroup = ['A', 'B','C']
aggregate = ['A','C','B']
tau,p = categorical_kendalltau(aggregate,subgroup)
print(tau,scale(tau),flip(scale(tau)))

0.33333333333333337 0.6666666666666667 0.33333333333333326


In [8]:
subgroup = ['A', 'B','C']
aggregate = ['C','B','A',]
tau,p = categorical_kendalltau(aggregate,subgroup)
print(tau,scale(tau),flip(scale(tau)))

-1.0 0.0 1.0


In [26]:
subgroup = ['A', 'B','C']
aggregate = ['A', 'B','C']
tau,p = categorical_kendalltau(aggregate,subgroup)
print(tau,scale(tau),flip(scale(tau)))

1.0 1.0 0.0


# Kendall Tau for rank strength

We also use kendall tau for computing a strength of a ranking.  To demonstrate this, we'll load a dataset

In [31]:
labeled_df = wg.LabeledDataFrame('../wiggum_app/static/data/rateSPdataDeptRace.csv')

In [33]:
labeled_df.df.head()

variable,department,gender,decision,race
0,3,M,1,W
1,3,M,1,H
2,0,F,0,W
3,1,F,0,H
4,3,M,1,W


In [36]:
labeled_df.infer_var_types()
labeled_df.meta_df

Unnamed: 0_level_0,dtype,var_type,role,isCount,weighting_var
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
department,int64,ordinal,,,
gender,object,binary,,,
decision,int64,binary,,,
race,object,categorical,,,


In [37]:
roles = {'department':['groupby','Trend'],'gender':['groupby','Trend'],'decision':'Trend','race':['groupby','Trend']}
var_types = {'gender':'categorical','department':'categorical'}

labeled_df.set_roles(roles)
labeled_df.set_var_types(var_types)
labeled_df.meta_df

Unnamed: 0_level_0,dtype,var_type,role,isCount,weighting_var
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
department,int64,categorical,"[groupby, Trend]",,
gender,object,categorical,"[groupby, Trend]",,
decision,int64,binary,Trend,,
race,object,categorical,"[groupby, Trend]",,


In [39]:
labeled_df.to_csvs('../data/rateSPdataDeptRace')

In [75]:
rankfeat = 'race'
statfeat = 'decision'

In [76]:
stat_df = labeled_df.df.groupby(rankfeat)[statfeat].mean()
stat_df.sort_values(inplace=True)
stat_df

race
B    0.230000
H    0.252174
W    0.253503
Name: decision, dtype: float64

In [77]:
ordered_rank_feat = stat_df.index.values
ordered_rank_feat

array(['B', 'H', 'W'], dtype=object)

In [78]:
actual_order = labeled_df.df.sort_values(statfeat)[rankfeat]
actual_order.values

array(['H', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W',
       'W', 'W', 'W', 'H', 'W', 'W', 'W', 'W', 'W', 'H', 'W', 'W', 'W',
       'H', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'B', 'W', 'W', 'W',
       'W', 'W', 'H', 'W', 'W', 'W', 'W', 'W', 'W', 'B', 'H', 'W', 'W',
       'W', 'W', 'W', 'W', 'H', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W',
       'W', 'W', 'W', 'W', 'W', 'B', 'B', 'W', 'H', 'B', 'W', 'W', 'W',
       'W', 'W', 'W', 'W', 'H', 'W', 'W', 'W', 'W', 'B', 'W', 'W', 'W',
       'W', 'W', 'W', 'B', 'W', 'W', 'W', 'W', 'B', 'W', 'W', 'W', 'B',
       'W', 'W', 'W', 'W', 'B', 'W', 'W', 'B', 'W', 'B', 'B', 'W', 'B',
       'W', 'W', 'H', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W',
       'W', 'W', 'W', 'B', 'B', 'H', 'W', 'B', 'W', 'W', 'W', 'W', 'W',
       'W', 'B', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'B', 'W', 'W', 'W',
       'B', 'H', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'B', 'B', 'H', 'W',
       'W', 'W', 'W', 'B', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'B

if the data is weighted we do some additional replications, if very large amount of weighting we do an approximation

In [79]:
counts = labeled_df.df.groupby([rankfeat])[statfeat].count()
rep_counts = [int(counts[ov]) for ov in ordered_rank_feat]
Trend_order = np.repeat(ordered_rank_feat,rep_counts)
Trend_order

array(['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'H', 'H', 'H', 'H',
       'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
       'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
       'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
       'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
       'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
       'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H

In [80]:
# map the possibly string order lists into numbers
numeric_map = {a:i for i,a in enumerate(actual_order)}
num_acutal = [numeric_map[a] for a in actual_order]
num_Trend = [numeric_map[b] for b in Trend_order]
# compute and round
tau,p = kendalltau(num_Trend,num_acutal)
tau_qual = np.abs(np.round(tau,4))
tau_qual

0.022

To get intution, we'll make a plot 

In [None]:
num_Trend

In [84]:
test_score = {'F':70,'M':90}
grade_exact = lambda row: test_score[row['gender']]
labeled_df.df['test'] = labeled_df.df.apply(grade_exact,axis=1)
labeled_df.df.head()

variable,department,gender,decision,race,test
0,3,M,1,W,90
1,3,M,1,H,90
2,0,F,0,W,70
3,1,F,0,H,70
4,3,M,1,W,90


In [85]:
rankfeat = 'gender'
statfeat = 'test'
stat_df = labeled_df.df.groupby(rankfeat)[statfeat].mean()
stat_df.sort_values(inplace=True)
ordered_rank_feat = stat_df.index.values
actual_order = labeled_df.df.sort_values(statfeat)[rankfeat]

counts = labeled_df.df.groupby([rankfeat])[statfeat].count()
rep_counts = [int(counts[ov]) for ov in ordered_rank_feat]
Trend_order = np.repeat(ordered_rank_feat,rep_counts)
numeric_map = {a:i for i,a in enumerate(actual_order)}
num_acutal = [numeric_map[a] for a in actual_order]
num_Trend = [numeric_map[b] for b in Trend_order]
# compute and round
tau,p = kendalltau(num_Trend,num_acutal)
tau_qual = np.abs(np.round(tau,4))
tau_qual

1.0

In [101]:
grade_noisy = lambda row: test_score[row['gender']] + 5*np.random.normal()
labeled_df.df['noisy_test'] = labeled_df.df.apply(grade_noisy,axis=1)
labeled_df.df.head()

variable,department,gender,decision,race,test,noisy_test
0,3,M,1,W,90,96.706467
1,3,M,1,H,90,92.847975
2,0,F,0,W,70,73.040397
3,1,F,0,H,70,69.754128
4,3,M,1,W,90,98.921457


In [102]:
rankfeat = 'gender'
statfeat = 'noisy_test'
stat_df = labeled_df.df.groupby(rankfeat)[statfeat].mean()
stat_df.sort_values(inplace=True)
ordered_rank_feat = stat_df.index.values
actual_order = labeled_df.df.sort_values(statfeat)[rankfeat]

counts = labeled_df.df.groupby([rankfeat])[statfeat].count()
rep_counts = [int(counts[ov]) for ov in ordered_rank_feat]
Trend_order = np.repeat(ordered_rank_feat,rep_counts)
numeric_map = {a:i for i,a in enumerate(actual_order)}
num_acutal = [numeric_map[a] for a in actual_order]
num_Trend = [numeric_map[b] for b in Trend_order]
# compute and round
tau,p = kendalltau(num_Trend,num_acutal)
tau_qual = np.abs(np.round(tau,4))
tau_qual

0.9628

In [103]:
stat_df

gender
F    69.770914
M    90.478628
Name: noisy_test, dtype: float64

In [108]:

rankfeat = 'gender'

for s in [5,10,15,20]:
    grade_noisy = lambda row: test_score[row['gender']] + s*np.random.normal()
    statfeat = 'noisy_test'+str(s)
    labeled_df.df[statfeat] = labeled_df.df.apply(grade_noisy,axis=1)

    stat_df = labeled_df.df.groupby(rankfeat)[statfeat].mean()
    stat_df.sort_values(inplace=True)
    print(stat_df)
    ordered_rank_feat = stat_df.index.values
    actual_order = labeled_df.df.sort_values(statfeat)[rankfeat]

    counts = labeled_df.df.groupby([rankfeat])[statfeat].count()
    rep_counts = [int(counts[ov]) for ov in ordered_rank_feat]
    Trend_order = np.repeat(ordered_rank_feat,rep_counts)
    numeric_map = {a:i for i,a in enumerate(actual_order)}
    num_acutal = [numeric_map[a] for a in actual_order]
    num_Trend = [numeric_map[b] for b in Trend_order]
    # compute and round
    tau,p = kendalltau(num_Trend,num_acutal)
    tau_qual = np.abs(np.round(tau,4))
    print('spread',s)
    print('tau score',tau_qual)

gender
F    70.250329
M    90.094069
Name: noisy_test5, dtype: float64
spread 5
tau score 0.9669
gender
F    69.846963
M    89.828831
Name: noisy_test10, dtype: float64
spread 10
tau score 0.6649
gender
F    68.155556
M    90.210926
Name: noisy_test15, dtype: float64
spread 15
tau score 0.5284
gender
F    69.519778
M    90.023689
Name: noisy_test20, dtype: float64
spread 20
tau score 0.3877


As the variance increases, the scores are more mixed and the score goes down even though the means stay the same

In [116]:
p = {'M':.8,'F':.1}
# p_admit = [p, 1-p]
biased_admit = lambda row: np.random.choice([1,0],p=[p[row['gender']],1-p[row['gender']]])
labeled_df.df['biased_admit'] = labeled_df.df.apply(biased_admit,axis=1)
labeled_df.df.head()

variable,department,gender,decision,race,test,noisy_test,noisy_test5,noisy_test10,noisy_test15,noisy_test20,biased_admit
0,3,M,1,W,90,84.941389,89.093489,94.735379,86.872916,79.389392,1
1,3,M,1,H,90,85.774722,77.539657,93.598838,83.260359,106.085155,1
2,0,F,0,W,70,74.622318,79.535317,72.011845,49.304987,101.351116,0
3,1,F,0,H,70,87.202168,70.315282,66.525876,58.040736,62.434925,0
4,3,M,1,W,90,77.783862,91.411126,81.714048,89.374329,106.586728,1


In [117]:
rankfeat = 'gender'
statfeat = 'biased_admit'
stat_df = labeled_df.df.groupby(rankfeat)[statfeat].mean()
stat_df.sort_values(inplace=True)
ordered_rank_feat = stat_df.index.values
actual_order = labeled_df.df.sort_values(statfeat)[rankfeat]

counts = labeled_df.df.groupby([rankfeat])[statfeat].count()
rep_counts = [int(counts[ov]) for ov in ordered_rank_feat]
Trend_order = np.repeat(ordered_rank_feat,rep_counts)
numeric_map = {a:i for i,a in enumerate(actual_order)}
num_acutal = [numeric_map[a] for a in actual_order]
num_Trend = [numeric_map[b] for b in Trend_order]
# compute and round
tau,p = kendalltau(num_Trend,num_acutal)
tau_qual = np.abs(np.round(tau,4))
tau_qual

0.6401

In [118]:
stat_df

gender
F    0.088020
M    0.825719
Name: biased_admit, dtype: float64

We can get high scores even for binary decisions if it's really bad, but mostly these will not be very high and will vary with the number of rows