# TOPSIS Ranking for Batsmen

In [1]:
import numpy as np               # for linear algebra
import pandas as pd              # for tabular output
from scipy.stats import rankdata # for ranking the candidates

In [2]:
attributes_data = pd.read_csv('data/bowling_criteria.csv')
attributes_data

Unnamed: 0,Name,Ranking,Ideally
0,SR,1,Lower
1,Econ,2,Lower
2,Avg,3,Lower
3,Wkts,4,Higher
4,Runs,5,Lower
5,Inns,6,Higher
6,TBB,7,Higher
7,5w,8,Higher
8,4w,9,Higher
9,BBI,10,Higher


In [5]:
benefit_attributes = set()
attributes = []
ranks = []
n = 0

for _, row in attributes_data.iterrows():
    name = row['Name']
    if name == 'BBI':
        continue
    attributes.append(name)
    ranks.append(float(row['Ranking']))
        
    if row['Ideally'] == 'Higher':
        benefit_attributes.add(n)
    
    n += 1

ranks = np.array(ranks)
benefit_attributes

{3, 5, 6, 7, 8, 9}

In [4]:
weights = 2 * (n + 1 - ranks) / (n * (n + 1))
weights

array([0.18181818, 0.16363636, 0.14545455, 0.12727273, 0.10909091,
       0.09090909, 0.07272727, 0.05454545, 0.03636364, 0.        ])

In [6]:
original_dataframe = pd.read_csv('data/bowlers.csv')
candidates = original_dataframe['Name'].to_numpy()
raw_data = pd.DataFrame(original_dataframe, columns=attributes).to_numpy()

dimensions = raw_data.shape
m = dimensions[0]
n = dimensions[1]

pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Econ,Avg,Wkts,Runs,Inns,TBB,5w,4w,BBI,Mat
Andre Russell,16.45,9.51,26.09,11,287,12,181,0,0,2/21,14
Ben Stokes,16.83,11.23,31.5,6,189,6,101,0,0,2/39,9
Chris Morris,15.23,9.27,23.54,13,306,9,198,0,0,3/22,9
Dwayne Bravo,22.45,8.02,30.0,11,330,12,247,0,0,3/33,12
Imran Tahir,14.85,6.7,16.58,26,431,17,386,0,2,4/12,17
Jofra Archer,23.45,6.77,26.45,11,291,11,258,0,0,3/15,11
Kagiso Rabada,11.28,7.83,14.72,25,368,12,282,0,2,4/21,12
Keemo Paul,18.11,8.72,26.33,9,237,8,163,0,0,3/17,8
Lasith Malinga,16.81,9.77,27.38,16,438,12,269,0,2,4/31,12
Moeen Ali,25.0,6.76,28.17,6,169,9,150,0,0,2/18,11


In [6]:
divisors = np.empty(n)
for j in range(n):
    column = raw_data[:,j]
#     print(np.sqrt(column @ column))
    divisors[j] = np.sqrt(column @ column)

raw_data /= divisors
pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Avg,Runs,Inn,NO,6s,4s,100s,50s,Mat,HS,BF
AB de Villiers,0.264163,0.266301,0.277322,0.26926,0.264135,0.287807,0.222189,0.0,0.354441,0.260889,0.24583,0.259994
Andre Russel,0.35132,0.341432,0.319987,0.26926,0.35218,0.575614,0.222189,0.0,0.283552,0.280957,0.239834,0.22557
Ben Stokes,0.213114,0.123511,0.077173,0.186411,0.264135,0.044278,0.057339,0.0,0.0,0.180615,0.137905,0.089684
Chris Gayle,0.263477,0.245997,0.307438,0.26926,0.088045,0.376363,0.322533,0.0,0.283552,0.260889,0.296795,0.288983
Chris Lynn,0.239548,0.187676,0.254107,0.26926,0.0,0.243529,0.293863,0.0,0.283552,0.260889,0.24583,0.262712
David Warner,0.24677,0.416924,0.434178,0.248548,0.17609,0.23246,0.408542,0.707107,0.567105,0.24082,0.299792,0.43574
Faf Du Plessis,0.211605,0.216897,0.24846,0.248548,0.088045,0.166043,0.258026,0.0,0.212664,0.24082,0.287801,0.290795
Jonny Bairstow,0.269721,0.335166,0.279204,0.207123,0.17609,0.199251,0.344035,0.707107,0.141776,0.200683,0.341763,0.256371
Jos Buttler,0.260218,0.234249,0.195129,0.165699,0.0,0.154973,0.272361,0.0,0.212664,0.160547,0.266815,0.18571
Kane Williamson,0.205841,0.134295,0.097878,0.186411,0.17609,0.055348,0.086009,0.0,0.070888,0.180615,0.209855,0.117767


In [7]:
raw_data *= weights
pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Avg,Runs,Inn,NO,6s,4s,100s,50s,Mat,HS,BF
AB de Villiers,0.04064,0.037555,0.035554,0.031068,0.027091,0.025829,0.017091,0.0,0.018176,0.010034,0.006303,0.003333
Andre Russel,0.054049,0.048151,0.041024,0.031068,0.036121,0.051658,0.017091,0.0,0.014541,0.010806,0.00615,0.002892
Ben Stokes,0.032787,0.017418,0.009894,0.021509,0.027091,0.003974,0.004411,0.0,0.0,0.006947,0.003536,0.00115
Chris Gayle,0.040535,0.034692,0.039415,0.031068,0.00903,0.033776,0.02481,0.0,0.014541,0.010034,0.00761,0.003705
Chris Lynn,0.036854,0.026467,0.032578,0.031068,0.0,0.021855,0.022605,0.0,0.014541,0.010034,0.006303,0.003368
David Warner,0.037965,0.058797,0.055664,0.028679,0.018061,0.020862,0.031426,0.045327,0.029082,0.009262,0.007687,0.005586
Faf Du Plessis,0.032555,0.030588,0.031854,0.028679,0.00903,0.014901,0.019848,0.0,0.010906,0.009262,0.00738,0.003728
Jonny Bairstow,0.041496,0.047267,0.035795,0.023899,0.018061,0.017882,0.026464,0.045327,0.007271,0.007719,0.008763,0.003287
Jos Buttler,0.040034,0.033035,0.025017,0.019119,0.0,0.013908,0.020951,0.0,0.010906,0.006175,0.006841,0.002381
Kane Williamson,0.031668,0.018939,0.012549,0.021509,0.018061,0.004967,0.006616,0.0,0.003635,0.006947,0.005381,0.00151


In [8]:
a_pos = np.zeros(n)
a_neg = np.zeros(n)
for j in range(n):
    column = raw_data[:,j]
    max_val = np.max(column)
    min_val = np.min(column)
    
    # See if we want to maximize benefit or minimize cost (for PIS)
    if j in benefit_attributes:
        a_pos[j] = max_val
        a_neg[j] = min_val
    else:
        a_pos[j] = min_val
        a_neg[j] = max_val

pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Avg,Runs,Inn,NO,6s,4s,100s,50s,Mat,HS,BF
AB de Villiers,0.04064,0.037555,0.035554,0.031068,0.027091,0.025829,0.017091,0.0,0.018176,0.010034,0.006303,0.003333
Andre Russel,0.054049,0.048151,0.041024,0.031068,0.036121,0.051658,0.017091,0.0,0.014541,0.010806,0.00615,0.002892
Ben Stokes,0.032787,0.017418,0.009894,0.021509,0.027091,0.003974,0.004411,0.0,0.0,0.006947,0.003536,0.00115
Chris Gayle,0.040535,0.034692,0.039415,0.031068,0.00903,0.033776,0.02481,0.0,0.014541,0.010034,0.00761,0.003705
Chris Lynn,0.036854,0.026467,0.032578,0.031068,0.0,0.021855,0.022605,0.0,0.014541,0.010034,0.006303,0.003368
David Warner,0.037965,0.058797,0.055664,0.028679,0.018061,0.020862,0.031426,0.045327,0.029082,0.009262,0.007687,0.005586
Faf Du Plessis,0.032555,0.030588,0.031854,0.028679,0.00903,0.014901,0.019848,0.0,0.010906,0.009262,0.00738,0.003728
Jonny Bairstow,0.041496,0.047267,0.035795,0.023899,0.018061,0.017882,0.026464,0.045327,0.007271,0.007719,0.008763,0.003287
Jos Buttler,0.040034,0.033035,0.025017,0.019119,0.0,0.013908,0.020951,0.0,0.010906,0.006175,0.006841,0.002381
Kane Williamson,0.031668,0.018939,0.012549,0.021509,0.018061,0.004967,0.006616,0.0,0.003635,0.006947,0.005381,0.00151


In [9]:
sp = np.zeros(m)
sn = np.zeros(m)
cs = np.zeros(m)

for i in range(m):
    diff_pos = raw_data[i] - a_pos
    diff_neg = raw_data[i] - a_neg
    sp[i] = np.sqrt(diff_pos @ diff_pos)
    sn[i] = np.sqrt(diff_neg @ diff_neg)
    cs[i] = sn[i] / (sp[i] + sn[i])

pd.DataFrame(data=zip(sp, sn, cs), index=candidates, columns=["$S^*$", "$S^-$", "$C^*$"])

Unnamed: 0,$S^*$,$S^-$,$C^*$
AB de Villiers,0.070196,0.055112,0.439813
Andre Russel,0.056888,0.081165,0.587927
Ben Stokes,0.106526,0.027294,0.203959
Chris Gayle,0.076169,0.055195,0.420169
Chris Lynn,0.090296,0.04084,0.31143
David Warner,0.051679,0.090778,0.637231
Faf Du Plessis,0.088862,0.036606,0.291756
Jonny Bairstow,0.062814,0.069648,0.525796
Jos Buttler,0.09581,0.032634,0.254072
Kane Williamson,0.105748,0.01912,0.15312


In [10]:
def rank_according_to(data):
    ranks = (rankdata(data) - 1).astype(int)
    storage = np.zeros_like(candidates)
    storage[ranks] = candidates
    return storage[::-1]

In [11]:
cs_order = rank_according_to(cs)
sp_order = rank_according_to(sp)
sn_order = rank_according_to(sn)

pd.DataFrame(data=zip(cs_order, sp_order, sn_order), index=range(1, m + 1), columns=["$C^*$", "$S^*$", "$S^-$"])

Unnamed: 0,$C^*$,$S^*$,$S^-$
1,David Warner,Ben Stokes,David Warner
2,Andre Russel,Kane Williamson,Andre Russel
3,Jonny Bairstow,Jos Buttler,Jonny Bairstow
4,Kieron Pollard,Shane Watson,Kieron Pollard
5,AB de Villiers,Moeen Ali,Marcus Stoinis
6,Chris Gayle,Steve Smith,Chris Gayle
7,Marcus Stoinis,Chris Lynn,AB de Villiers
8,Quinton de Kock,Faf Du Plessis,Quinton de Kock
9,Chris Lynn,Marcus Stoinis,Shane Watson
10,Shane Watson,Quinton de Kock,Chris Lynn


In [12]:
print("The best candidate/alternative according to C* is " + cs_order[0])
print("The preferences in descending order are " + ", ".join(cs_order) + ".")

The best candidate/alternative according to C* is David Warner
The preferences in descending order are David Warner, Andre Russel, Jonny Bairstow, Kieron Pollard, AB de Villiers, Chris Gayle, Marcus Stoinis, Quinton de Kock, Chris Lynn, Shane Watson, Faf Du Plessis, Steve Smith, Jos Buttler, Moeen Ali, Ben Stokes, Kane Williamson.
