In [14]:
import math                      # for sqrt and other functions
import numpy as np               # for linear algebra
import pandas as pd              # for tabular output
from scipy.stats import rankdata # for ranking the candidates

# Step 0 - Obtaining and preprocessing data

In [2]:
attributes_data = pd.read_csv('../data/bowling_criteria.csv')
attributes_data

Unnamed: 0,Name,Ranking,Ideally
0,SR,1,Higher
1,Avg,2,Higher
2,Runs,3,Higher
3,Inn,4,Higher
4,NO,5,Higher
5,6s,6,Higher
6,4s,7,Higher
7,100s,8,Higher
8,50s,9,Higher
9,Mat,10,Higher


In [3]:
benefit_attributes = set()
attributes = []
ranks = []
n = 0

for i, row in attributes_data.iterrows():
    attributes.append(row['Name'])
    ranks.append(float(row['Ranking']))
    n += 1
    
    if row['Ideally'] == 'Higher':
        benefit_attributes.add(i)

ranks = np.array(ranks)

In [4]:
weights = 2 * (n + 1 - ranks) / (n * (n + 1))
pd.DataFrame(data=weights, index=attributes, columns=['Weight'])

Unnamed: 0,Weight
SR,0.153846
Avg,0.141026
Runs,0.128205
Inn,0.115385
NO,0.102564
6s,0.089744
4s,0.076923
100s,0.064103
50s,0.051282
Mat,0.038462


In [5]:
original_dataframe = pd.read_csv('../data/batsmen.csv')
candidates = original_dataframe['Name'].to_numpy()
raw_data = pd.DataFrame(original_dataframe, columns=attributes).to_numpy()

dimensions = raw_data.shape
m = dimensions[0]
n = dimensions[1]

pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Avg,Runs,Inn,NO,6s,4s,100s,50s,Mat,HS,BF
AB de Villiers,154.0,44.2,442.0,13.0,3.0,26.0,31.0,0.0,5.0,13.0,82.0,287.0
Andre Russel,204.81,56.67,510.0,13.0,4.0,52.0,31.0,0.0,4.0,14.0,80.0,249.0
Ben Stokes,124.24,20.5,123.0,9.0,3.0,4.0,8.0,0.0,0.0,9.0,46.0,99.0
Chris Gayle,153.6,40.83,490.0,13.0,1.0,34.0,45.0,0.0,4.0,13.0,99.0,319.0
Chris Lynn,139.65,31.15,405.0,13.0,0.0,22.0,41.0,0.0,4.0,13.0,82.0,290.0
David Warner,143.86,69.2,692.0,12.0,2.0,21.0,57.0,1.0,8.0,12.0,100.0,481.0
Faf Du Plessis,123.36,36.0,396.0,12.0,1.0,15.0,36.0,0.0,3.0,12.0,96.0,321.0
Jonny Bairstow,157.24,55.63,445.0,10.0,2.0,18.0,48.0,1.0,2.0,10.0,114.0,283.0
Jos Buttler,151.7,38.88,311.0,8.0,0.0,14.0,38.0,0.0,3.0,8.0,89.0,205.0
Kane Williamson,120.0,22.29,156.0,9.0,2.0,5.0,12.0,0.0,1.0,9.0,70.0,130.0


# Step 1 - Normalizing the Ratings And Weights

$$
P_{ij} = \begin{cases}
\frac{x_{ij}}{\sum^{m}_{k=1} {x_{kj}}} & \text{if } j \in J_1\\
\frac{\frac{1}{x_{ij}}}{\sum^{m}_{k=1} \frac{1}{x_{kj}}} & \text{if } j \in J_2
\end{cases}
$$

$$
w_{rc} = \frac{w_c}{w_r}
$$

and $w_r = \text{max}\left\{w_c | c = 1, 2, \ldots, n\right\}$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

In [6]:
for j in range(n):
    column = raw_data[:,j]
    if j in benefit_attributes:
        raw_data[:,j] /= sum(column)
    else:
        column = 1 / column
        raw_data[:,j] = column / sum(column)

pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Avg,Runs,Inn,NO,6s,4s,100s,50s,Mat,HS,BF
AB de Villiers,0.066773,0.070266,0.074587,0.068783,0.085714,0.084142,0.061024,0.0,0.106383,0.066667,0.062932,0.069728
Andre Russel,0.088803,0.09009,0.086061,0.068783,0.114286,0.168285,0.061024,0.0,0.085106,0.071795,0.061397,0.060496
Ben Stokes,0.053869,0.032589,0.020756,0.047619,0.085714,0.012945,0.015748,0.0,0.0,0.046154,0.035303,0.024052
Chris Gayle,0.066599,0.064908,0.082686,0.068783,0.028571,0.110032,0.088583,0.0,0.085106,0.066667,0.075979,0.077502
Chris Lynn,0.060551,0.04952,0.068343,0.068783,0.0,0.071197,0.080709,0.0,0.085106,0.066667,0.062932,0.070457
David Warner,0.062376,0.110009,0.116774,0.063492,0.057143,0.067961,0.112205,0.5,0.170213,0.061538,0.076746,0.116861
Faf Du Plessis,0.053488,0.05723,0.066824,0.063492,0.028571,0.048544,0.070866,0.0,0.06383,0.061538,0.073676,0.077988
Jonny Bairstow,0.068178,0.088436,0.075093,0.05291,0.057143,0.058252,0.094488,0.5,0.042553,0.051282,0.08749,0.068756
Jos Buttler,0.065775,0.061808,0.052481,0.042328,0.0,0.045307,0.074803,0.0,0.06383,0.041026,0.068304,0.049806
Kane Williamson,0.052031,0.035435,0.026325,0.047619,0.057143,0.016181,0.023622,0.0,0.021277,0.046154,0.053722,0.031584


In [7]:
max_weight = max(weights)
weights /= max_weight

pd.DataFrame(data=weights, index=attributes, columns=['Weight'])

Unnamed: 0,Weight
SR,1.0
Avg,0.916667
Runs,0.833333
Inn,0.75
NO,0.666667
6s,0.583333
4s,0.5
100s,0.416667
50s,0.333333
Mat,0.25


# Step 2 - Calculating Dominance Degrees

For the contribution of each criteria, we have:

$$
\Phi_{c}\left(A_i, A_j\right) = \begin{cases}
\sqrt{\frac{\left(P_{ic} - P_{jc}\right) w_{rc}}{\sum^{n}_{c=1} {w_{rc}}}} & \text{if } P_{ic} - P_{jc} > 0  \\
0 & \text{if } P_{ic} - P_{jc} = 0 \\
-\frac{1}{\theta}\sqrt{\frac{\left(\sum^{n}_{c=1} {w_{rc}}\right) \left(P_{jc} - P_{ic}\right)}{w_{rc}}} & \text{if } P_{ic} - P_{jc} < 0
\end{cases}
$$

Combining all contributions, we get the dominance degrees:

$$
\delta\left(A_i, A_j\right) = \sum^{n}_{c = 1} {\Phi_{c}\left(A_i, A_j\right)}
$$

Here $c = 1, 2, \ldots, n$, $i, j = 1, 2, \ldots, m$.

In [8]:
# The loss attenuation factor
theta = 1.0

In [9]:
phi = np.zeros((n, m, m))

weight_sum = sum(weights)

for c in range(n):
    for i in range(m):
        for j in range(m):
            pic = raw_data[i,c]
            pjc = raw_data[j,c]
            val = 0
            if pic > pjc:
                val = math.sqrt((pic - pjc) * weights[c] / weight_sum)
            if pic < pjc:
                val = -1.0 / theta * math.sqrt(weight_sum * (pjc - pic) / weights[c])
            phi[c, i, j] = val

phi

array([[[ 0.        , -0.37841692,  0.04455527, ...,  0.03750774,
          0.04199652,  0.05034709],
        [ 0.05821799,  0.        ,  0.07331102, ...,  0.06925435,
          0.07178469,  0.07696859],
        [-0.28960924, -0.47652165,  0.        , ..., -0.15631674,
         -0.09673085,  0.02344478],
        ...,
        [-0.2438003 , -0.45015326,  0.02404873, ...,  0.        ,
          0.0188912 ,  0.0335857 ],
        [-0.27297739, -0.4666005 ,  0.01488167, ..., -0.12279278,
          0.        ,  0.02776908],
        [-0.3272561 , -0.50029584, -0.15239108, ..., -0.21830705,
         -0.18049903,  0.        ]],

       [[ 0.        , -0.37492556,  0.07289271, ...,  0.04474409,
          0.06827114,  0.03112087],
        [ 0.05287412,  0.        ,  0.09005009, ...,  0.06926547,
          0.08635173,  0.06135292],
        [-0.51687556, -0.63853702,  0.        , ..., -0.40803938,
         -0.18111663, -0.46740001],
        ...,
        [-0.31727624, -0.49115516,  0.05754401, ...,  

In [10]:
delta = np.zeros((m, m))
for i in range(m):
    for j in range(m):
        delta[i,j] = sum(phi[:,i,j])

pd.DataFrame(data=delta, index=candidates, columns=candidates)

Unnamed: 0,AB de Villiers,Andre Russel,Ben Stokes,Chris Gayle,Chris Lynn,David Warner,Faf Du Plessis,Jonny Bairstow,Jos Buttler,Kane Williamson,Kieron Pollard,Marcus Stoinis,Moeen Ali,Quinton de Kock,Shane Watson,Steve Smith
AB de Villiers,0.0,-2.863567,0.541653,-2.736826,-0.470107,-8.323082,-1.469548,-4.718239,-0.478668,0.563287,-1.747245,-0.811863,0.263601,-3.177333,-2.701426,0.402402
Andre Russel,-1.432863,0.0,0.680447,-2.185471,-1.227112,-8.352768,-1.751344,-4.861276,-0.40626,0.668864,-1.363234,-0.227213,0.585822,-3.176525,-2.930901,-0.184972
Ben Stokes,-8.63677,-9.667204,0.0,-9.146171,-8.182355,-13.270051,-7.802967,-10.830438,-6.242437,-3.047484,-7.635036,-4.509206,-5.279731,-9.286885,-8.464639,-6.335553
Chris Gayle,-1.464281,-2.969557,-0.175313,0.0,0.285547,-8.043642,0.118903,-4.553716,0.409858,0.014354,-1.79331,-1.094065,-0.283686,-2.311074,-0.825192,-0.1371
Chris Lynn,-2.701454,-3.741018,-0.417572,-3.823306,0.0,-9.426578,-1.984101,-5.766008,-0.686636,-0.283309,-2.544914,-1.401158,-0.67513,-4.20448,-2.472064,-0.754139
David Warner,-1.209961,-2.478412,0.286893,-0.915043,-0.167186,0.0,0.639631,-0.486275,0.574301,0.791753,-1.890787,-0.340003,0.43189,-0.987474,-0.655316,0.684438
Faf Du Plessis,-3.656001,-4.732884,-0.356786,-3.705243,-2.293111,-9.522781,0.0,-5.872369,-0.466721,-0.110308,-2.814779,-1.472636,-0.849702,-4.643568,-2.159345,-0.51911
Jonny Bairstow,-3.131,-4.366829,0.157937,-3.398367,-2.207302,-6.057709,-1.84139,0.0,-0.088623,0.652716,-2.297592,-0.51857,-0.049678,-4.253256,-2.720731,-0.66804
Jos Buttler,-5.721169,-6.457348,-1.096931,-6.463471,-4.292833,-11.468555,-4.064842,-8.271405,0.0,-0.965622,-3.323164,-2.234822,-1.985314,-6.235162,-4.644032,-2.984853
Kane Williamson,-8.286399,-8.948843,-0.484118,-8.413792,-7.306546,-12.686198,-7.081497,-10.064896,-5.310323,0.0,-6.188112,-3.995256,-3.472313,-8.463717,-7.646521,-5.268632


In [11]:
delta_sums = np.zeros(m)
for i in range(m):
    delta_sums[i] = sum(delta[i,:])
pd.DataFrame(data=delta_sums,index=candidates,columns=['Sum'])

Unnamed: 0,Sum
AB de Villiers,-27.726961
Andre Russel,-26.164804
Ben Stokes,-118.336927
Chris Gayle,-22.822275
Chris Lynn,-40.881867
David Warner,-5.721549
Faf Du Plessis,-43.175344
Jonny Bairstow,-30.788434
Jos Buttler,-70.209524
Kane Williamson,-103.617163


In [12]:
delta_min = min(delta_sums)
delta_max = max(delta_sums)
pd.DataFrame(data=[delta_min, delta_max], columns=['Value'], index=['Minimum', 'Maximum'])

Unnamed: 0,Value
Minimum,-118.336927
Maximum,-5.721549


In [13]:
ratings = (delta_sums - delta_min) / (delta_max - delta_min)
pd.DataFrame(data=ratings, index=candidates, columns=['Rating'])

Unnamed: 0,Rating
AB de Villiers,0.804597
Andre Russel,0.818468
Ben Stokes,0.0
Chris Gayle,0.848149
Chris Lynn,0.687784
David Warner,1.0
Faf Du Plessis,0.667418
Jonny Bairstow,0.777412
Jos Buttler,0.427361
Kane Williamson,0.130708


In [15]:
def rank_according_to(data):
    ranks = (rankdata(data) - 1).astype(int)
    storage = np.zeros_like(candidates)
    storage[ranks] = candidates
    return storage[::-1]

In [16]:
result = rank_according_to(ratings)
pd.DataFrame(data=result, index=range(1, m + 1), columns=['Name'])

Unnamed: 0,Name
1,David Warner
2,Chris Gayle
3,Quinton de Kock
4,Andre Russel
5,AB de Villiers
6,Jonny Bairstow
7,Shane Watson
8,Chris Lynn
9,Faf Du Plessis
10,Kieron Pollard


In [17]:
print("The best candidate/alternative according to C* is " + result[0])
print("The preferences in descending order are " + ", ".join(result) + ".")

The best candidate/alternative according to C* is David Warner
The preferences in descending order are David Warner, Chris Gayle, Quinton de Kock, Andre Russel, AB de Villiers, Jonny Bairstow, Shane Watson, Chris Lynn, Faf Du Plessis, Kieron Pollard, Steve Smith, Jos Buttler, Moeen Ali, Marcus Stoinis, Kane Williamson, Ben Stokes.
