# TOPSIS Ranking

In [1]:
import numpy as np               # for linear algebra
import pandas as pd              # for tabular output
from scipy.stats import rankdata # for ranking the candidates

## Step 0 - Obtaining and processing the data

The data from the Excel sheet is saved into CSV files and stored in the `data` folder at the root of the project. The criteria, their rankings, the players' scores based on the mentioned criteria are stored in Numpy arrays and processed for the next step.

Note that an attribute can be beneficial attribute (in which case, we will want to maximize it's contribution) or a cost attribute (which we will need to minimize). We call the set of beneficial attributes $J_1$ and that of cost attributes $J_2 = J_1^C$.

In [2]:
bowlers_data = {
    'weights': '../data/bowling_criteria.csv',
    'scores': '../data/bowlers.csv',
}
batsmen_data = {
    'weights': '../data/batting_criteria.csv',
    'scores': '../data/batsmen.csv',
}
data = bowlers_data

In [3]:
attributes_data = pd.read_csv(data['weights'])
attributes_data

Unnamed: 0,Name,Ranking,Ideally
0,SR,1,Lower
1,Econ,2,Lower
2,Avg,3,Lower
3,Wkts,4,Higher
4,Runs,5,Lower
5,Inns,6,Higher
6,TBB,7,Higher
7,4w,8,Higher
8,Mat,9,Higher


In [4]:
benefit_attributes = set()
attributes = []
ranks = []
n = 0

for i, row in attributes_data.iterrows():
    attributes.append(row['Name'])
    ranks.append(float(row['Ranking']))
    n += 1
    
    if row['Ideally'] == 'Higher':
        benefit_attributes.add(i)

ranks = np.array(ranks)

In [5]:
weights = 2 * (n + 1 - ranks) / (n * (n + 1))
pd.DataFrame(data=weights, index=attributes, columns=['Weight'])

Unnamed: 0,Weight
SR,0.2
Econ,0.177778
Avg,0.155556
Wkts,0.133333
Runs,0.111111
Inns,0.088889
TBB,0.066667
4w,0.044444
Mat,0.022222


In [6]:
original_dataframe = pd.read_csv(data['scores'])
candidates = original_dataframe['Name'].to_numpy()
raw_data = pd.DataFrame(original_dataframe, columns=attributes).to_numpy()

dimensions = raw_data.shape
m = dimensions[0]
n = dimensions[1]

pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Econ,Avg,Wkts,Runs,Inns,TBB,4w,Mat
Andre Russell,16.45,9.51,26.09,11.0,287.0,12.0,181.0,0.0,14.0
Ben Stokes,16.83,11.23,31.5,6.0,189.0,6.0,101.0,0.0,9.0
Chris Morris,15.23,9.27,23.54,13.0,306.0,9.0,198.0,0.0,9.0
Dwayne Bravo,22.45,8.02,30.0,11.0,330.0,12.0,247.0,0.0,12.0
Imran Tahir,14.85,6.7,16.58,26.0,431.0,17.0,386.0,2.0,17.0
Jofra Archer,23.45,6.77,26.45,11.0,291.0,11.0,258.0,0.0,11.0
Kagiso Rabada,11.28,7.83,14.72,25.0,368.0,12.0,282.0,2.0,12.0
Keemo Paul,18.11,8.72,26.33,9.0,237.0,8.0,163.0,0.0,8.0
Lasith Malinga,16.81,9.77,27.38,16.0,438.0,12.0,269.0,2.0,12.0
Moeen Ali,25.0,6.76,28.17,6.0,169.0,9.0,150.0,0.0,11.0


## Step 1 - Normalizing the ratings

$$r_{ij}=\frac{x_{ij}}{\sqrt{\sum_{i = 1}^{m} x_{ij}^2}}$$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

In [7]:
divisors = np.empty(n)
for j in range(n):
    column = raw_data[:,j]
    divisors[j] = np.sqrt(column @ column)

raw_data /= divisors
pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Econ,Avg,Wkts,Runs,Inns,TBB,4w,Mat
Andre Russell,0.212905,0.293421,0.249384,0.207142,0.239655,0.28387,0.197151,0.0,0.319173
Ben Stokes,0.217824,0.34649,0.301096,0.112987,0.157822,0.141935,0.110012,0.0,0.205182
Chris Morris,0.197115,0.286016,0.22501,0.244804,0.255521,0.212902,0.215668,0.0,0.205182
Dwayne Bravo,0.290561,0.247449,0.286758,0.207142,0.275561,0.28387,0.26904,0.0,0.273576
Imran Tahir,0.192197,0.206721,0.158482,0.489608,0.3599,0.402149,0.420443,0.534522,0.387567
Jofra Archer,0.303503,0.208881,0.252825,0.207142,0.242995,0.260214,0.281021,0.0,0.250778
Kagiso Rabada,0.145992,0.241586,0.140703,0.470777,0.307293,0.28387,0.307163,0.534522,0.273576
Keemo Paul,0.23439,0.269046,0.251678,0.16948,0.197903,0.189246,0.177545,0.0,0.182384
Lasith Malinga,0.217565,0.301443,0.261715,0.301297,0.365745,0.28387,0.293003,0.534522,0.273576
Moeen Ali,0.323564,0.208573,0.269266,0.112987,0.141121,0.212902,0.163385,0.0,0.250778


## Step 2 - Calculating the Weighted Normalized Ratings

$$v_{ij} = w_j r_{ij}$$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

In [8]:
raw_data *= weights
pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,SR,Econ,Avg,Wkts,Runs,Inns,TBB,4w,Mat
Andre Russell,0.042581,0.052164,0.038793,0.027619,0.026628,0.025233,0.013143,0.0,0.007093
Ben Stokes,0.043565,0.061598,0.046837,0.015065,0.017536,0.012616,0.007334,0.0,0.00456
Chris Morris,0.039423,0.050847,0.035001,0.032641,0.028391,0.018925,0.014378,0.0,0.00456
Dwayne Bravo,0.058112,0.043991,0.044607,0.027619,0.030618,0.025233,0.017936,0.0,0.006079
Imran Tahir,0.038439,0.03675,0.024653,0.065281,0.039989,0.035747,0.02803,0.023757,0.008613
Jofra Archer,0.060701,0.037134,0.039328,0.027619,0.026999,0.02313,0.018735,0.0,0.005573
Kagiso Rabada,0.029198,0.042949,0.021887,0.06277,0.034144,0.025233,0.020478,0.023757,0.006079
Keemo Paul,0.046878,0.04783,0.03915,0.022597,0.021989,0.016822,0.011836,0.0,0.004053
Lasith Malinga,0.043513,0.05359,0.040711,0.040173,0.040638,0.025233,0.019534,0.023757,0.006079
Moeen Ali,0.064713,0.03708,0.041886,0.015065,0.01568,0.018925,0.010892,0.0,0.005573


## Step 3 - Identifying PIS ($A^*$) and NIS ($A^-$)

$$
A^* = \left\{v_1^*, v_2^*, \ldots, v_n^*\right\}
$$
$$
A^- = \left\{v_1^-, v_2^-, \ldots, v_n^-\right\}
$$

And we define

$$
v_j^* = \max{(v_{ij})}, \text{ if} j \in J_1
$$
$$
v_j^* = \min{(v_{ij})}, \text{ if} j \in J_2
$$
$$
v_j^- = \min{(v_{ij})}, \text{ if} j \in J_1
$$
$$
v_j^- = \max{(v_{ij})}, \text{ if} j \in J_2
$$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

In [9]:
a_pos = np.zeros(n)
a_neg = np.zeros(n)
for j in range(n):
    column = raw_data[:,j]
    max_val = np.max(column)
    min_val = np.min(column)
    
    # See if we want to maximize benefit or minimize cost (for PIS)
    if j in benefit_attributes:
        a_pos[j] = max_val
        a_neg[j] = min_val
    else:
        a_pos[j] = min_val
        a_neg[j] = max_val

pd.DataFrame(data=[a_pos, a_neg], index=["$A^*$", "$A^-$"], columns=attributes)

Unnamed: 0,SR,Econ,Avg,Wkts,Runs,Inns,TBB,4w,Mat
$A^*$,0.029198,0.034447,0.021887,0.065281,0.015123,0.035747,0.02803,0.023757,0.008613
$A^-$,0.068854,0.061598,0.051595,0.012554,0.040638,0.010514,0.007334,0.0,0.002533


## Step 4 and 5 - Calculating Separation Measures and Similarities to PIS

The separation or distance between the alternatives can be measured by the $n$-dimensional Euclidean distance. The separation from the PIS $A^*$ and NIS $A^-$ are $S^*$ and $S^-$ respectively.

$$
S_i^* = \sqrt{\sum_{j = 1}^n \left(v_{ij} - v^*_j\right)^2} \\
$$
$$
S_i^- = \sqrt{\sum_{j = 1}^n \left(v_{ij} - v^-_j\right)^2} \\
$$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

We also calculate

$$
C^*_i = \frac{S_i^-}{S_i^* + S_i^-},\text{ where }i = 1, 2, \ldots, m
$$

In [10]:
sp = np.zeros(m)
sn = np.zeros(m)
cs = np.zeros(m)

for i in range(m):
    diff_pos = raw_data[i] - a_pos
    diff_neg = raw_data[i] - a_neg
    sp[i] = np.sqrt(diff_pos @ diff_pos)
    sn[i] = np.sqrt(diff_neg @ diff_neg)
    cs[i] = sn[i] / (sp[i] + sn[i])

pd.DataFrame(data=zip(sp, sn, cs), index=candidates, columns=["$S^*$", "$S^-$", "$C^*$"])

Unnamed: 0,$S^*$,$S^-$,$C^*$
Andre Russell,0.056819,0.040468,0.415961
Ben Stokes,0.075085,0.034796,0.316672
Chris Morris,0.053264,0.043989,0.452315
Dwayne Bravo,0.06233,0.033812,0.351688
Imran Tahir,0.02677,0.081947,0.753762
Jofra Archer,0.060683,0.039074,0.391692
Kagiso Rabada,0.024786,0.079581,0.762512
Keemo Paul,0.062163,0.036585,0.370485
Lasith Malinga,0.048952,0.050299,0.506789
Moeen Ali,0.073078,0.037874,0.341355


## Step 6 - Ranking the candidates/alternatives

We choose the candidate with the maximum $C^*$ or rank all the alternatives in descending order according to their $C^*$ values. This process can also be done for the $S^*$ and $S^-$ values.

In [11]:
def rank_according_to(data):
    ranks = (rankdata(data) - 1).astype(int)
    storage = np.zeros_like(candidates)
    storage[ranks] = candidates
    return storage[::-1]

In [12]:
cs_order = rank_according_to(cs)
sp_order = rank_according_to(sp)
sn_order = rank_according_to(sn)

pd.DataFrame(data=zip(cs_order, sp_order[::-1], sn_order), index=range(1, m + 1),
             columns=["$C^*$", "$S^*$", "$S^-$"])

Unnamed: 0,$C^*$,$S^*$,$S^-$
1,Kagiso Rabada,Kagiso Rabada,Imran Tahir
2,Imran Tahir,Imran Tahir,Kagiso Rabada
3,Rashid Khan,Rashid Khan,Rashid Khan
4,Lasith Malinga,Lasith Malinga,Lasith Malinga
5,Chris Morris,Chris Morris,Chris Morris
6,Andre Russell,Andre Russell,Mohammad Nabi
7,Mohammad Nabi,Jofra Archer,Andre Russell
8,Jofra Archer,Mohammad Nabi,Jofra Archer
9,Keemo Paul,Keemo Paul,Moeen Ali
10,Dwayne Bravo,Dwayne Bravo,Keemo Paul


In [13]:
print("The best candidate/alternative according to C* is " + cs_order[0])
print("The preferences in descending order are " + ", ".join(cs_order) + ".")

The best candidate/alternative according to C* is Kagiso Rabada
The preferences in descending order are Kagiso Rabada, Imran Tahir, Rashid Khan, Lasith Malinga, Chris Morris, Andre Russell, Mohammad Nabi, Jofra Archer, Keemo Paul, Dwayne Bravo, Moeen Ali, Sam Curran, Ben Stokes, Sunil Narine, Trent Boult.
