In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import itertools
from collections import defaultdict, Counter

In [2]:
# Example from Michael Hay's paper
# https://cs.colgate.edu/~mhay/assets/publications/hay2017differentially.pdf
VOTES = {
    
    "V1": ['e', 'a', 'c', 'b', 'd'],
    "V2": ['a', 'e', 'd', 'c', 'b'],
    "V3": ['c', 'b', 'a', 'd', 'e'],
    "V4": ['e', 'd', 'c', 'b', 'a'],
    "V5": ['b', 'a', 'd', 'e', 'c'],
    "V6": ['c', 'e', 'd', 'a', 'b'],
    "V7": ['c', 'b', 'e', 'd', 'a'],
    "V8": ['e', 'd', 'c', 'b', 'a']
}

In [3]:
votes = pd.DataFrame(VOTES)
options = votes['V1'].tolist()
votes

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8
0,e,a,c,e,b,c,c,e
1,a,e,b,d,a,e,b,d
2,c,d,a,c,d,d,e,c
3,b,c,d,b,e,a,d,b
4,d,b,e,a,c,b,a,a


In [4]:
# 1. BORDA
# 2. QUICK SORT
# 3. OPT

In [5]:
def borda_ranking(votes, options):
    
    ranksum = defaultdict(int)
    for voter_id, vote in votes.T.iterrows():
        for rank, candidate in enumerate(vote):
            ranksum[candidate] += rank
            
    return pd.Series(ranksum).sort_values(ascending=True)

In [6]:
borda_ranking(votes, options)

e    11
c    13
d    18
a    19
b    19
dtype: int64

In [7]:
def kendallTau(lstA, lstB):

    '''NAIVE implementation following pseudo code
    
    Generate n choose 2 pairwise comparisons.
    Then check for how many of those pairs, 
    lst A and lst B disagree. 
    
    You can disagree if for a pair:
    lst A has left val smaller than right val but lstB has the opposite
    lst A has left val greater than right val but lstB has the opposite
    '''
    v = 0
    n = len(lstA)
    ranksA = {v:i for i,v in enumerate(lstA)}
    ranksB = {v:i for i,v in enumerate(lstB)}    
    total_pairs = itertools.combinations(lstA, 2)
    
    for x, y in total_pairs:
        
        a_flag = np.logical_and(ranksA[x] < ranksA[y], ranksB[x] > ranksB[y])
        b_flag = np.logical_and(ranksA[x] > ranksA[y], ranksB[x] < ranksB[y])

        if np.logical_or(a_flag, b_flag):
            v += 1

    return abs(v)/(n * (n - 1) / 2.0)


In [8]:
# EXAMPLE USAGE

lstA = options[:]
lstB = options[:]

np.random.shuffle(lstA)
np.random.shuffle(lstB)

kendallTau(lstA, lstB)

0.4

In [9]:
def get_kendal_score_for_ranking(ranking, votes):
    '''For a given ranking, calculate average kendall tau score when compared to voted rankings
    '''
    score = 0
    for voter_id, vote in votes.T.iterrows():
        score += kendallTau(ranking, vote)
        
    return score/votes.shape[1]

get_kendal_score_for_ranking(['e', 'c', 'd', 'a', 'b'], votes)

0.4

In [10]:
# Now iterate through all possible permutations and pick the best    
def get_optimal_kemeny_young_rank(votes, options):
    '''Generate all possible rankings and then get optimal ranking.
    '''
    all_rankings = list(itertools.permutations(options))
    scores = []
    for ranking in all_rankings:
        scores.append(get_kendal_score_for_ranking(ranking, votes))
        
    idx = np.argmin(scores)
    print("optimal score is ", np.min(scores))
    return list(all_rankings[idx])


In [11]:
get_optimal_kemeny_young_rank(votes, options)

optimal score is  0.37499999999999994


['e', 'c', 'b', 'a', 'd']