# Ranking
## Basic Ranking

Compute numerical data ranks (1 through n) along axis.

By default, equal values are assigned a rank that is the average of the ranks of those values.

In [1]:
import pandas as pd

data = {
    "employee": ["Anna", "Bob", "Carlos", "Diana", "Eva"],
    "total_sales": [50000, 75000, 75000, 90000, 60000]
}

df = pd.DataFrame(data)

print(df)

  employee  total_sales
0     Anna        50000
1      Ben        75000
2   Carlos        75000
3    Diana        90000
4      Eva        60000


### Rank by Sales (Ascending by Default)

In [2]:
df["sales_rank"] = df["total_sales"].rank()

print(df)

  employee  total_sales  sales_rank
0     Anna        50000         1.0
1      Ben        75000         3.5
2   Carlos        75000         3.5
3    Diana        90000         5.0
4      Eva        60000         2.0


## Ranking Highest First
Most real use cases want highest value = rank 1.

In [3]:
df["sales_rank_desc"] = df["total_sales"].rank(ascending=False)

print(df)

  employee  total_sales  sales_rank  sales_rank_desc
0     Anna        50000         1.0              5.0
1      Ben        75000         3.5              2.5
2   Carlos        75000         3.5              2.5
3    Diana        90000         5.0              1.0
4      Eva        60000         2.0              4.0


## Handling Ties

Pandas offers different tie methods: ‘average’, ‘min’, ‘max’, ‘first’, ‘dense’}, default ‘average’.

In [4]:
df["rank_average"] = df["total_sales"].rank(
    ascending=False,
    method="average"
)

df

Unnamed: 0,employee,total_sales,sales_rank,sales_rank_desc,rank_average
0,Anna,50000,1.0,5.0,5.0
1,Ben,75000,3.5,2.5,2.5
2,Carlos,75000,3.5,2.5,2.5
3,Diana,90000,5.0,1.0,1.0
4,Eva,60000,2.0,4.0,4.0


In [6]:
df["rank_min"] = df["total_sales"].rank(
    ascending=False,
    method="min"
)

df

Unnamed: 0,employee,total_sales,sales_rank,sales_rank_desc,rank_average,rank_min
0,Anna,50000,1.0,5.0,5.0,5.0
1,Ben,75000,3.5,2.5,2.5,2.0
2,Carlos,75000,3.5,2.5,2.5,2.0
3,Diana,90000,5.0,1.0,1.0,1.0
4,Eva,60000,2.0,4.0,4.0,4.0


In [7]:
df["rank_max"] = df["total_sales"].rank(
    ascending=False,
    method="max"
)

print(df)

  employee  total_sales  sales_rank  sales_rank_desc  rank_average  rank_min  \
0     Anna        50000         1.0              5.0           5.0       5.0   
1      Ben        75000         3.5              2.5           2.5       2.0   
2   Carlos        75000         3.5              2.5           2.5       2.0   
3    Diana        90000         5.0              1.0           1.0       1.0   
4      Eva        60000         2.0              4.0           4.0       4.0   

   rank_max  
0       5.0  
1       3.0  
2       3.0  
3       1.0  
4       4.0  


In [8]:
# Method = "dense" (Very Popular in Analytics)
# like ‘min’, but rank always increases by 1 between groups.

df["rank_dense"] = df["total_sales"].rank(
    ascending=False,
    method="dense"
)

print(df)

  employee  total_sales  sales_rank  sales_rank_desc  rank_average  rank_min  \
0     Anna        50000         1.0              5.0           5.0       5.0   
1      Ben        75000         3.5              2.5           2.5       2.0   
2   Carlos        75000         3.5              2.5           2.5       2.0   
3    Diana        90000         5.0              1.0           1.0       1.0   
4      Eva        60000         2.0              4.0           4.0       4.0   

   rank_max  rank_dense  
0       5.0         4.0  
1       3.0         2.0  
2       3.0         2.0  
3       1.0         1.0  
4       4.0         3.0  


# Creating Percentile Rank
Sometimes ranking position is less useful than percentile.

In [11]:
df["sales_percentile"] = df["total_sales"].rank(pct=True)

print(df[['employee', 'total_sales', 'sales_percentile']])

  employee  total_sales  sales_percentile
0     Anna        50000               0.2
1      Ben        75000               0.7
2   Carlos        75000               0.7
3    Diana        90000               1.0
4      Eva        60000               0.4
